summavy/tokenizer/
ascii_folding_filter.rs

1use std::mem;
2
3use super::{BoxTokenStream, Token, TokenFilter, TokenStream};
4
5/// This class converts alphabetic, numeric, and symbolic Unicode characters
6/// which are not in the first 127 ASCII characters (the "Basic Latin" Unicode
7/// block) into their ASCII equivalents, if one exists.
8#[derive(Clone)]
9pub struct AsciiFoldingFilter;
10
11impl TokenFilter for AsciiFoldingFilter {
12    fn transform<'a>(&self, token_stream: BoxTokenStream<'a>) -> BoxTokenStream<'a> {
13        From::from(AsciiFoldingFilterTokenStream {
14            tail: token_stream,
15            buffer: String::with_capacity(100),
16        })
17    }
18}
19
20pub struct AsciiFoldingFilterTokenStream<'a> {
21    buffer: String,
22    tail: BoxTokenStream<'a>,
23}
24
25impl<'a> TokenStream for AsciiFoldingFilterTokenStream<'a> {
26    fn advance(&mut self) -> bool {
27        if !self.tail.advance() {
28            return false;
29        }
30        if !self.token_mut().text.is_ascii() {
31            // ignore its already ascii
32            to_ascii(&self.tail.token().text, &mut self.buffer);
33            mem::swap(&mut self.tail.token_mut().text, &mut self.buffer);
34        }
35        true
36    }
37
38    fn token(&self) -> &Token {
39        self.tail.token()
40    }
41
42    fn token_mut(&mut self) -> &mut Token {
43        self.tail.token_mut()
44    }
45}
46
47// Returns a string that represents the ascii folded version of
48// the character. If the `char` does not require ascii folding
49// (e.g. simple ASCII chars like `A`) or if the `char`
50// does not have a sensible ascii equivalent (e.g.: Kanjis like 馬,
51// this function returns `None`.
52fn fold_non_ascii_char(c: char) -> Option<&'static str> {
53    match c {
54        '\u{00C0}' | // À  [LATIN CAPITAL LETTER A WITH GRAVE]
55        '\u{00C1}' | // Á  [LATIN CAPITAL LETTER A WITH ACUTE]
56        '\u{00C2}' | // Â  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX]
57        '\u{00C3}' | // Ã  [LATIN CAPITAL LETTER A WITH TILDE]
58        '\u{00C4}' | // Ä  [LATIN CAPITAL LETTER A WITH DIAERESIS]
59        '\u{00C5}' | // Å  [LATIN CAPITAL LETTER A WITH RING ABOVE]
60        '\u{0100}' | // Ā  [LATIN CAPITAL LETTER A WITH MACRON]
61        '\u{0102}' | // Ă  [LATIN CAPITAL LETTER A WITH BREVE]
62        '\u{0104}' | // Ą  [LATIN CAPITAL LETTER A WITH OGONEK]
63        '\u{018F}' | // Ə  http://en.wikipedia.org/wiki/Schwa  [LATIN CAPITAL LETTER SCHWA]
64        '\u{01CD}' | // Ǎ  [LATIN CAPITAL LETTER A WITH CARON]
65        '\u{01DE}' | // Ǟ  [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON]
66        '\u{01E0}' | // Ǡ  [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON]
67        '\u{01FA}' | // Ǻ  [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE]
68        '\u{0200}' | // Ȁ  [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE]
69        '\u{0202}' | // Ȃ  [LATIN CAPITAL LETTER A WITH INVERTED BREVE]
70        '\u{0226}' | // Ȧ  [LATIN CAPITAL LETTER A WITH DOT ABOVE]
71        '\u{023A}' | // Ⱥ  [LATIN CAPITAL LETTER A WITH STROKE]
72        '\u{1D00}' | // ᴀ  [LATIN LETTER SMALL CAPITAL A]
73        '\u{1E00}' | // Ḁ  [LATIN CAPITAL LETTER A WITH RING BELOW]
74        '\u{1EA0}' | // Ạ  [LATIN CAPITAL LETTER A WITH DOT BELOW]
75        '\u{1EA2}' | // Ả  [LATIN CAPITAL LETTER A WITH HOOK ABOVE]
76        '\u{1EA4}' | // Ấ  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE]
77        '\u{1EA6}' | // Ầ  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE]
78        '\u{1EA8}' | // Ẩ  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
79        '\u{1EAA}' | // Ẫ  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE]
80        '\u{1EAC}' | // Ậ  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
81        '\u{1EAE}' | // Ắ  [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE]
82        '\u{1EB0}' | // Ằ  [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE]
83        '\u{1EB2}' | // Ẳ  [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE]
84        '\u{1EB4}' | // Ẵ  [LATIN CAPITAL LETTER A WITH BREVE AND TILDE]
85        '\u{1EB6}' | // Ặ  [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW]
86        '\u{24B6}' | // Ⓐ  [CIRCLED LATIN CAPITAL LETTER A]
87        '\u{FF21}'  // A  [FULLWIDTH LATIN CAPITAL LETTER A]
88        => Some("A"),
89        '\u{00E0}' | // à  [LATIN SMALL LETTER A WITH GRAVE]
90        '\u{00E1}' | // á  [LATIN SMALL LETTER A WITH ACUTE]
91        '\u{00E2}' | // â  [LATIN SMALL LETTER A WITH CIRCUMFLEX]
92        '\u{00E3}' | // ã  [LATIN SMALL LETTER A WITH TILDE]
93        '\u{00E4}' | // ä  [LATIN SMALL LETTER A WITH DIAERESIS]
94        '\u{00E5}' | // å  [LATIN SMALL LETTER A WITH RING ABOVE]
95        '\u{0101}' | // ā  [LATIN SMALL LETTER A WITH MACRON]
96        '\u{0103}' | // ă  [LATIN SMALL LETTER A WITH BREVE]
97        '\u{0105}' | // ą  [LATIN SMALL LETTER A WITH OGONEK]
98        '\u{01CE}' | // ǎ  [LATIN SMALL LETTER A WITH CARON]
99        '\u{01DF}' | // ǟ  [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON]
100        '\u{01E1}' | // ǡ  [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON]
101        '\u{01FB}' | // ǻ  [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE]
102        '\u{0201}' | // ȁ  [LATIN SMALL LETTER A WITH DOUBLE GRAVE]
103        '\u{0203}' | // ȃ  [LATIN SMALL LETTER A WITH INVERTED BREVE]
104        '\u{0227}' | // ȧ  [LATIN SMALL LETTER A WITH DOT ABOVE]
105        '\u{0250}' | // ɐ  [LATIN SMALL LETTER TURNED A]
106        '\u{0259}' | // ə  [LATIN SMALL LETTER SCHWA]
107        '\u{025A}' | // ɚ  [LATIN SMALL LETTER SCHWA WITH HOOK]
108        '\u{1D8F}' | // ᶏ  [LATIN SMALL LETTER A WITH RETROFLEX HOOK]
109        '\u{1D95}' | // ᶕ  [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK]
110        '\u{1E01}' | // ạ  [LATIN SMALL LETTER A WITH RING BELOW]
111        '\u{1E9A}' | // ả  [LATIN SMALL LETTER A WITH RIGHT HALF RING]
112        '\u{1EA1}' | // ạ  [LATIN SMALL LETTER A WITH DOT BELOW]
113        '\u{1EA3}' | // ả  [LATIN SMALL LETTER A WITH HOOK ABOVE]
114        '\u{1EA5}' | // ấ  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE]
115        '\u{1EA7}' | // ầ  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE]
116        '\u{1EA9}' | // ẩ  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
117        '\u{1EAB}' | // ẫ  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE]
118        '\u{1EAD}' | // ậ  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
119        '\u{1EAF}' | // ắ  [LATIN SMALL LETTER A WITH BREVE AND ACUTE]
120        '\u{1EB1}' | // ằ  [LATIN SMALL LETTER A WITH BREVE AND GRAVE]
121        '\u{1EB3}' | // ẳ  [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE]
122        '\u{1EB5}' | // ẵ  [LATIN SMALL LETTER A WITH BREVE AND TILDE]
123        '\u{1EB7}' | // ặ  [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW]
124        '\u{2090}' | // ₐ  [LATIN SUBSCRIPT SMALL LETTER A]
125        '\u{2094}' | // ₔ  [LATIN SUBSCRIPT SMALL LETTER SCHWA]
126        '\u{24D0}' | // ⓐ  [CIRCLED LATIN SMALL LETTER A]
127        '\u{2C65}' | // ⱥ  [LATIN SMALL LETTER A WITH STROKE]
128        '\u{2C6F}' | // Ɐ  [LATIN CAPITAL LETTER TURNED A]
129        '\u{FF41}'  // a  [FULLWIDTH LATIN SMALL LETTER A]
130        => Some("a"),
131        '\u{A732}'  // Ꜳ  [LATIN CAPITAL LETTER AA]
132        => Some("AA"),
133        '\u{00C6}' | // Æ  [LATIN CAPITAL LETTER AE]
134        '\u{01E2}' | // Ǣ  [LATIN CAPITAL LETTER AE WITH MACRON]
135        '\u{01FC}' | // Ǽ  [LATIN CAPITAL LETTER AE WITH ACUTE]
136        '\u{1D01}' // ᴁ  [LATIN LETTER SMALL CAPITAL AE]
137        => Some("AE"),
138        '\u{A734}' // Ꜵ  [LATIN CAPITAL LETTER AO]
139        => Some("AO"),
140        '\u{A736}'  // Ꜷ  [LATIN CAPITAL LETTER AU]
141        => Some("AU"),
142        '\u{A738}' | // Ꜹ  [LATIN CAPITAL LETTER AV]
143        '\u{A73A}'  // Ꜻ  [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR]
144        => Some("AV"),
145        '\u{A73C}'  // Ꜽ  [LATIN CAPITAL LETTER AY]
146        => Some("AY"),
147        '\u{249C}'  // ⒜  [PARENTHESIZED LATIN SMALL LETTER A]
148        => Some("(a)"),
149        '\u{A733}' // ꜳ  [LATIN SMALL LETTER AA]
150        => Some("aa"),
151        '\u{00E6}' | // æ  [LATIN SMALL LETTER AE]
152        '\u{01E3}' | // ǣ  [LATIN SMALL LETTER AE WITH MACRON]
153        '\u{01FD}' | // ǽ  [LATIN SMALL LETTER AE WITH ACUTE]
154        '\u{1D02}' // ᴂ  [LATIN SMALL LETTER TURNED AE]
155        => Some("ae"),
156        '\u{A735}' // ꜵ  [LATIN SMALL LETTER AO]
157        => Some("ao"),
158        '\u{A737}' // ꜷ  [LATIN SMALL LETTER AU]
159        => Some("au"),
160        '\u{A739}' | // ꜹ  [LATIN SMALL LETTER AV]
161        '\u{A73B}' // ꜻ  [LATIN SMALL LETTER AV WITH HORIZONTAL BAR]
162        => Some("av"),
163        '\u{A73D}' // ꜽ  [LATIN SMALL LETTER AY]
164        => Some("ay"),
165        '\u{0181}' | // Ɓ  [LATIN CAPITAL LETTER B WITH HOOK]
166        '\u{0182}' | // Ƃ  [LATIN CAPITAL LETTER B WITH TOPBAR]
167        '\u{0243}' | // Ƀ  [LATIN CAPITAL LETTER B WITH STROKE]
168        '\u{0299}' | // ʙ  [LATIN LETTER SMALL CAPITAL B]
169        '\u{1D03}' | // ᴃ  [LATIN LETTER SMALL CAPITAL BARRED B]
170        '\u{1E02}' | // Ḃ  [LATIN CAPITAL LETTER B WITH DOT ABOVE]
171        '\u{1E04}' | // Ḅ  [LATIN CAPITAL LETTER B WITH DOT BELOW]
172        '\u{1E06}' | // Ḇ  [LATIN CAPITAL LETTER B WITH LINE BELOW]
173        '\u{24B7}' | // Ⓑ  [CIRCLED LATIN CAPITAL LETTER B]
174        '\u{FF22}' // B  [FULLWIDTH LATIN CAPITAL LETTER B]
175        => Some("B"),
176        '\u{0180}' | // ƀ  [LATIN SMALL LETTER B WITH STROKE]
177        '\u{0183}' | // ƃ  [LATIN SMALL LETTER B WITH TOPBAR]
178        '\u{0253}' | // ɓ  [LATIN SMALL LETTER B WITH HOOK]
179        '\u{1D6C}' | // ᵬ  [LATIN SMALL LETTER B WITH MIDDLE TILDE]
180        '\u{1D80}' | // ᶀ  [LATIN SMALL LETTER B WITH PALATAL HOOK]
181        '\u{1E03}' | // ḃ  [LATIN SMALL LETTER B WITH DOT ABOVE]
182        '\u{1E05}' | // ḅ  [LATIN SMALL LETTER B WITH DOT BELOW]
183        '\u{1E07}' | // ḇ  [LATIN SMALL LETTER B WITH LINE BELOW]
184        '\u{24D1}' | // ⓑ  [CIRCLED LATIN SMALL LETTER B]
185        '\u{FF42}' // b  [FULLWIDTH LATIN SMALL LETTER B]
186        => Some("b"),
187        '\u{249D}' // ⒝  [PARENTHESIZED LATIN SMALL LETTER B]
188        => Some("(b)"),
189        '\u{00C7}' | // Ç  [LATIN CAPITAL LETTER C WITH CEDILLA]
190        '\u{0106}' | // Ć  [LATIN CAPITAL LETTER C WITH ACUTE]
191        '\u{0108}' | // Ĉ  [LATIN CAPITAL LETTER C WITH CIRCUMFLEX]
192        '\u{010A}' | // Ċ  [LATIN CAPITAL LETTER C WITH DOT ABOVE]
193        '\u{010C}' | // Č  [LATIN CAPITAL LETTER C WITH CARON]
194        '\u{0187}' | // Ƈ  [LATIN CAPITAL LETTER C WITH HOOK]
195        '\u{023B}' | // Ȼ  [LATIN CAPITAL LETTER C WITH STROKE]
196        '\u{0297}' | // ʗ  [LATIN LETTER STRETCHED C]
197        '\u{1D04}' | // ᴄ  [LATIN LETTER SMALL CAPITAL C]
198        '\u{1E08}' | // Ḉ  [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE]
199        '\u{24B8}' | // Ⓒ  [CIRCLED LATIN CAPITAL LETTER C]
200        '\u{FF23}' // C  [FULLWIDTH LATIN CAPITAL LETTER C]
201        => Some("C"),
202        '\u{00E7}' | // ç  [LATIN SMALL LETTER C WITH CEDILLA]
203        '\u{0107}' | // ć  [LATIN SMALL LETTER C WITH ACUTE]
204        '\u{0109}' | // ĉ  [LATIN SMALL LETTER C WITH CIRCUMFLEX]
205        '\u{010B}' | // ċ  [LATIN SMALL LETTER C WITH DOT ABOVE]
206        '\u{010D}' | // č  [LATIN SMALL LETTER C WITH CARON]
207        '\u{0188}' | // ƈ  [LATIN SMALL LETTER C WITH HOOK]
208        '\u{023C}' | // ȼ  [LATIN SMALL LETTER C WITH STROKE]
209        '\u{0255}' | // ɕ  [LATIN SMALL LETTER C WITH CURL]
210        '\u{1E09}' | // ḉ  [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE]
211        '\u{2184}' | // ↄ  [LATIN SMALL LETTER REVERSED C]
212        '\u{24D2}' | // ⓒ  [CIRCLED LATIN SMALL LETTER C]
213        '\u{A73E}' | // Ꜿ  [LATIN CAPITAL LETTER REVERSED C WITH DOT]
214        '\u{A73F}' | // ꜿ  [LATIN SMALL LETTER REVERSED C WITH DOT]
215        '\u{FF43}' // c  [FULLWIDTH LATIN SMALL LETTER C]
216        => Some("c"),
217        '\u{249E}' // ⒞  [PARENTHESIZED LATIN SMALL LETTER C]
218        => Some("(c)"),
219        '\u{00D0}' | // Ð  [LATIN CAPITAL LETTER ETH]
220        '\u{010E}' | // Ď  [LATIN CAPITAL LETTER D WITH CARON]
221        '\u{0110}' | // Đ  [LATIN CAPITAL LETTER D WITH STROKE]
222        '\u{0189}' | // Ɖ  [LATIN CAPITAL LETTER AFRICAN D]
223        '\u{018A}' | // Ɗ  [LATIN CAPITAL LETTER D WITH HOOK]
224        '\u{018B}' | // Ƌ  [LATIN CAPITAL LETTER D WITH TOPBAR]
225        '\u{1D05}' | // ᴅ  [LATIN LETTER SMALL CAPITAL D]
226        '\u{1D06}' | // ᴆ  [LATIN LETTER SMALL CAPITAL ETH]
227        '\u{1E0A}' | // Ḋ  [LATIN CAPITAL LETTER D WITH DOT ABOVE]
228        '\u{1E0C}' | // Ḍ  [LATIN CAPITAL LETTER D WITH DOT BELOW]
229        '\u{1E0E}' | // Ḏ  [LATIN CAPITAL LETTER D WITH LINE BELOW]
230        '\u{1E10}' | // Ḑ  [LATIN CAPITAL LETTER D WITH CEDILLA]
231        '\u{1E12}' | // Ḓ  [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW]
232        '\u{24B9}' | // Ⓓ  [CIRCLED LATIN CAPITAL LETTER D]
233        '\u{A779}' | // Ꝺ  [LATIN CAPITAL LETTER INSULAR D]
234        '\u{FF24}' // D  [FULLWIDTH LATIN CAPITAL LETTER D]
235        => Some("D"),
236        '\u{00F0}' | // ð  [LATIN SMALL LETTER ETH]
237        '\u{010F}' | // ď  [LATIN SMALL LETTER D WITH CARON]
238        '\u{0111}' | // đ  [LATIN SMALL LETTER D WITH STROKE]
239        '\u{018C}' | // ƌ  [LATIN SMALL LETTER D WITH TOPBAR]
240        '\u{0221}' | // ȡ  [LATIN SMALL LETTER D WITH CURL]
241        '\u{0256}' | // ɖ  [LATIN SMALL LETTER D WITH TAIL]
242        '\u{0257}' | // ɗ  [LATIN SMALL LETTER D WITH HOOK]
243        '\u{1D6D}' | // ᵭ  [LATIN SMALL LETTER D WITH MIDDLE TILDE]
244        '\u{1D81}' | // ᶁ  [LATIN SMALL LETTER D WITH PALATAL HOOK]
245        '\u{1D91}' | // ᶑ  [LATIN SMALL LETTER D WITH HOOK AND TAIL]
246        '\u{1E0B}' | // ḋ  [LATIN SMALL LETTER D WITH DOT ABOVE]
247        '\u{1E0D}' | // ḍ  [LATIN SMALL LETTER D WITH DOT BELOW]
248        '\u{1E0F}' | // ḏ  [LATIN SMALL LETTER D WITH LINE BELOW]
249        '\u{1E11}' | // ḑ  [LATIN SMALL LETTER D WITH CEDILLA]
250        '\u{1E13}' | // ḓ  [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW]
251        '\u{24D3}' | // ⓓ  [CIRCLED LATIN SMALL LETTER D]
252        '\u{A77A}' | // ꝺ  [LATIN SMALL LETTER INSULAR D]
253        '\u{FF44}' // d  [FULLWIDTH LATIN SMALL LETTER D]
254        => Some("d"),
255        '\u{01C4}' | // DŽ  [LATIN CAPITAL LETTER DZ WITH CARON]
256        '\u{01F1}' // DZ  [LATIN CAPITAL LETTER DZ]
257        => Some("DZ"),
258        '\u{01C5}' | // Dž  [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON]
259        '\u{01F2}' // Dz  [LATIN CAPITAL LETTER D WITH SMALL LETTER Z]
260        => Some("Dz"),
261        '\u{249F}' // ⒟  [PARENTHESIZED LATIN SMALL LETTER D]
262        => Some("(d)"),
263        '\u{0238}' // ȸ  [LATIN SMALL LETTER DB DIGRAPH]
264        => Some("db"),
265        '\u{01C6}' | // dž  [LATIN SMALL LETTER DZ WITH CARON]
266        '\u{01F3}' | // dz  [LATIN SMALL LETTER DZ]
267        '\u{02A3}' | // ʣ  [LATIN SMALL LETTER DZ DIGRAPH]
268        '\u{02A5}' // ʥ  [LATIN SMALL LETTER DZ DIGRAPH WITH CURL]
269        => Some("dz"),
270        '\u{00C8}' | // È  [LATIN CAPITAL LETTER E WITH GRAVE]
271        '\u{00C9}' | // É  [LATIN CAPITAL LETTER E WITH ACUTE]
272        '\u{00CA}' | // Ê  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX]
273        '\u{00CB}' | // Ë  [LATIN CAPITAL LETTER E WITH DIAERESIS]
274        '\u{0112}' | // Ē  [LATIN CAPITAL LETTER E WITH MACRON]
275        '\u{0114}' | // Ĕ  [LATIN CAPITAL LETTER E WITH BREVE]
276        '\u{0116}' | // Ė  [LATIN CAPITAL LETTER E WITH DOT ABOVE]
277        '\u{0118}' | // Ę  [LATIN CAPITAL LETTER E WITH OGONEK]
278        '\u{011A}' | // Ě  [LATIN CAPITAL LETTER E WITH CARON]
279        '\u{018E}' | // Ǝ  [LATIN CAPITAL LETTER REVERSED E]
280        '\u{0190}' | // Ɛ  [LATIN CAPITAL LETTER OPEN E]
281        '\u{0204}' | // Ȅ  [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE]
282        '\u{0206}' | // Ȇ  [LATIN CAPITAL LETTER E WITH INVERTED BREVE]
283        '\u{0228}' | // Ȩ  [LATIN CAPITAL LETTER E WITH CEDILLA]
284        '\u{0246}' | // Ɇ  [LATIN CAPITAL LETTER E WITH STROKE]
285        '\u{1D07}' | // ᴇ  [LATIN LETTER SMALL CAPITAL E]
286        '\u{1E14}' | // Ḕ  [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE]
287        '\u{1E16}' | // Ḗ  [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE]
288        '\u{1E18}' | // Ḙ  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW]
289        '\u{1E1A}' | // Ḛ  [LATIN CAPITAL LETTER E WITH TILDE BELOW]
290        '\u{1E1C}' | // Ḝ  [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE]
291        '\u{1EB8}' | // Ẹ  [LATIN CAPITAL LETTER E WITH DOT BELOW]
292        '\u{1EBA}' | // Ẻ  [LATIN CAPITAL LETTER E WITH HOOK ABOVE]
293        '\u{1EBC}' | // Ẽ  [LATIN CAPITAL LETTER E WITH TILDE]
294        '\u{1EBE}' | // Ế  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE]
295        '\u{1EC0}' | // Ề  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE]
296        '\u{1EC2}' | // Ể  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
297        '\u{1EC4}' | // Ễ  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE]
298        '\u{1EC6}' | // Ệ  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
299        '\u{24BA}' | // Ⓔ  [CIRCLED LATIN CAPITAL LETTER E]
300        '\u{2C7B}' | // ⱻ  [LATIN LETTER SMALL CAPITAL TURNED E]
301        '\u{FF25}' // E  [FULLWIDTH LATIN CAPITAL LETTER E]
302        => Some("E"),
303        '\u{00E8}' | // è  [LATIN SMALL LETTER E WITH GRAVE]
304        '\u{00E9}' | // é  [LATIN SMALL LETTER E WITH ACUTE]
305        '\u{00EA}' | // ê  [LATIN SMALL LETTER E WITH CIRCUMFLEX]
306        '\u{00EB}' | // ë  [LATIN SMALL LETTER E WITH DIAERESIS]
307        '\u{0113}' | // ē  [LATIN SMALL LETTER E WITH MACRON]
308        '\u{0115}' | // ĕ  [LATIN SMALL LETTER E WITH BREVE]
309        '\u{0117}' | // ė  [LATIN SMALL LETTER E WITH DOT ABOVE]
310        '\u{0119}' | // ę  [LATIN SMALL LETTER E WITH OGONEK]
311        '\u{011B}' | // ě  [LATIN SMALL LETTER E WITH CARON]
312        '\u{01DD}' | // ǝ  [LATIN SMALL LETTER TURNED E]
313        '\u{0205}' | // ȅ  [LATIN SMALL LETTER E WITH DOUBLE GRAVE]
314        '\u{0207}' | // ȇ  [LATIN SMALL LETTER E WITH INVERTED BREVE]
315        '\u{0229}' | // ȩ  [LATIN SMALL LETTER E WITH CEDILLA]
316        '\u{0247}' | // ɇ  [LATIN SMALL LETTER E WITH STROKE]
317        '\u{0258}' | // ɘ  [LATIN SMALL LETTER REVERSED E]
318        '\u{025B}' | // ɛ  [LATIN SMALL LETTER OPEN E]
319        '\u{025C}' | // ɜ  [LATIN SMALL LETTER REVERSED OPEN E]
320        '\u{025D}' | // ɝ  [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK]
321        '\u{025E}' | // ɞ  [LATIN SMALL LETTER CLOSED REVERSED OPEN E]
322        '\u{029A}' | // ʚ  [LATIN SMALL LETTER CLOSED OPEN E]
323        '\u{1D08}' | // ᴈ  [LATIN SMALL LETTER TURNED OPEN E]
324        '\u{1D92}' | // ᶒ  [LATIN SMALL LETTER E WITH RETROFLEX HOOK]
325        '\u{1D93}' | // ᶓ  [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK]
326        '\u{1D94}' | // ᶔ  [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK]
327        '\u{1E15}' | // ḕ  [LATIN SMALL LETTER E WITH MACRON AND GRAVE]
328        '\u{1E17}' | // ḗ  [LATIN SMALL LETTER E WITH MACRON AND ACUTE]
329        '\u{1E19}' | // ḙ  [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW]
330        '\u{1E1B}' | // ḛ  [LATIN SMALL LETTER E WITH TILDE BELOW]
331        '\u{1E1D}' | // ḝ  [LATIN SMALL LETTER E WITH CEDILLA AND BREVE]
332        '\u{1EB9}' | // ẹ  [LATIN SMALL LETTER E WITH DOT BELOW]
333        '\u{1EBB}' | // ẻ  [LATIN SMALL LETTER E WITH HOOK ABOVE]
334        '\u{1EBD}' | // ẽ  [LATIN SMALL LETTER E WITH TILDE]
335        '\u{1EBF}' | // ế  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE]
336        '\u{1EC1}' | // ề  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE]
337        '\u{1EC3}' | // ể  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
338        '\u{1EC5}' | // ễ  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE]
339        '\u{1EC7}' | // ệ  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
340        '\u{2091}' | // ₑ  [LATIN SUBSCRIPT SMALL LETTER E]
341        '\u{24D4}' | // ⓔ  [CIRCLED LATIN SMALL LETTER E]
342        '\u{2C78}' | // ⱸ  [LATIN SMALL LETTER E WITH NOTCH]
343        '\u{FF45}' // e  [FULLWIDTH LATIN SMALL LETTER E]
344        => Some("e"),
345        '\u{24A0}' // ⒠  [PARENTHESIZED LATIN SMALL LETTER E]
346        => Some("(e)"),
347        '\u{0191}' | // Ƒ  [LATIN CAPITAL LETTER F WITH HOOK]
348        '\u{1E1E}' | // Ḟ  [LATIN CAPITAL LETTER F WITH DOT ABOVE]
349        '\u{24BB}' | // Ⓕ  [CIRCLED LATIN CAPITAL LETTER F]
350        '\u{A730}' | // ꜰ  [LATIN LETTER SMALL CAPITAL F]
351        '\u{A77B}' | // Ꝼ  [LATIN CAPITAL LETTER INSULAR F]
352        '\u{A7FB}' | // ꟻ  [LATIN EPIGRAPHIC LETTER REVERSED F]
353        '\u{FF26}' // F  [FULLWIDTH LATIN CAPITAL LETTER F]
354        => Some("F"),
355        '\u{0192}' | // ƒ  [LATIN SMALL LETTER F WITH HOOK]
356        '\u{1D6E}' | // ᵮ  [LATIN SMALL LETTER F WITH MIDDLE TILDE]
357        '\u{1D82}' | // ᶂ  [LATIN SMALL LETTER F WITH PALATAL HOOK]
358        '\u{1E1F}' | // ḟ  [LATIN SMALL LETTER F WITH DOT ABOVE]
359        '\u{1E9B}' | // ẛ  [LATIN SMALL LETTER LONG S WITH DOT ABOVE]
360        '\u{24D5}' | // ⓕ  [CIRCLED LATIN SMALL LETTER F]
361        '\u{A77C}' | // ꝼ  [LATIN SMALL LETTER INSULAR F]
362        '\u{FF46}' // f  [FULLWIDTH LATIN SMALL LETTER F]
363        => Some("f"),
364        '\u{24A1}' // ⒡  [PARENTHESIZED LATIN SMALL LETTER F]
365        => Some("(f)"),
366        '\u{FB00}' // ff  [LATIN SMALL LIGATURE FF]
367        => Some("ff"),
368        '\u{FB03}' // ffi  [LATIN SMALL LIGATURE FFI]
369        => Some("ffi"),
370        '\u{FB04}' // ffl  [LATIN SMALL LIGATURE FFL]
371        => Some("ffl"),
372        '\u{FB01}' // fi  [LATIN SMALL LIGATURE FI]
373        => Some("fi"),
374        '\u{FB02}' // fl  [LATIN SMALL LIGATURE FL]
375        => Some("fl"),
376        '\u{011C}' | // Ĝ  [LATIN CAPITAL LETTER G WITH CIRCUMFLEX]
377        '\u{011E}' | // Ğ  [LATIN CAPITAL LETTER G WITH BREVE]
378        '\u{0120}' | // Ġ  [LATIN CAPITAL LETTER G WITH DOT ABOVE]
379        '\u{0122}' | // Ģ  [LATIN CAPITAL LETTER G WITH CEDILLA]
380        '\u{0193}' | // Ɠ  [LATIN CAPITAL LETTER G WITH HOOK]
381        '\u{01E4}' | // Ǥ  [LATIN CAPITAL LETTER G WITH STROKE]
382        '\u{01E5}' | // ǥ  [LATIN SMALL LETTER G WITH STROKE]
383        '\u{01E6}' | // Ǧ  [LATIN CAPITAL LETTER G WITH CARON]
384        '\u{01E7}' | // ǧ  [LATIN SMALL LETTER G WITH CARON]
385        '\u{01F4}' | // Ǵ  [LATIN CAPITAL LETTER G WITH ACUTE]
386        '\u{0262}' | // ɢ  [LATIN LETTER SMALL CAPITAL G]
387        '\u{029B}' | // ʛ  [LATIN LETTER SMALL CAPITAL G WITH HOOK]
388        '\u{1E20}' | // Ḡ  [LATIN CAPITAL LETTER G WITH MACRON]
389        '\u{24BC}' | // Ⓖ  [CIRCLED LATIN CAPITAL LETTER G]
390        '\u{A77D}' | // Ᵹ  [LATIN CAPITAL LETTER INSULAR G]
391        '\u{A77E}' | // Ꝿ  [LATIN CAPITAL LETTER TURNED INSULAR G]
392        '\u{FF27}' // G  [FULLWIDTH LATIN CAPITAL LETTER G]
393        => Some("G"),
394        '\u{011D}' | // ĝ  [LATIN SMALL LETTER G WITH CIRCUMFLEX]
395        '\u{011F}' | // ğ  [LATIN SMALL LETTER G WITH BREVE]
396        '\u{0121}' | // ġ  [LATIN SMALL LETTER G WITH DOT ABOVE]
397        '\u{0123}' | // ģ  [LATIN SMALL LETTER G WITH CEDILLA]
398        '\u{01F5}' | // ǵ  [LATIN SMALL LETTER G WITH ACUTE]
399        '\u{0260}' | // ɠ  [LATIN SMALL LETTER G WITH HOOK]
400        '\u{0261}' | // ɡ  [LATIN SMALL LETTER SCRIPT G]
401        '\u{1D77}' | // ᵷ  [LATIN SMALL LETTER TURNED G]
402        '\u{1D79}' | // ᵹ  [LATIN SMALL LETTER INSULAR G]
403        '\u{1D83}' | // ᶃ  [LATIN SMALL LETTER G WITH PALATAL HOOK]
404        '\u{1E21}' | // ḡ  [LATIN SMALL LETTER G WITH MACRON]
405        '\u{24D6}' | // ⓖ  [CIRCLED LATIN SMALL LETTER G]
406        '\u{A77F}' | // ꝿ  [LATIN SMALL LETTER TURNED INSULAR G]
407        '\u{FF47}' // g  [FULLWIDTH LATIN SMALL LETTER G]
408        => Some("g"),
409        '\u{24A2}' // ⒢  [PARENTHESIZED LATIN SMALL LETTER G]
410        => Some("(g)"),
411        '\u{0124}' | // Ĥ  [LATIN CAPITAL LETTER H WITH CIRCUMFLEX]
412        '\u{0126}' | // Ħ  [LATIN CAPITAL LETTER H WITH STROKE]
413        '\u{021E}' | // Ȟ  [LATIN CAPITAL LETTER H WITH CARON]
414        '\u{029C}' | // ʜ  [LATIN LETTER SMALL CAPITAL H]
415        '\u{1E22}' | // Ḣ  [LATIN CAPITAL LETTER H WITH DOT ABOVE]
416        '\u{1E24}' | // Ḥ  [LATIN CAPITAL LETTER H WITH DOT BELOW]
417        '\u{1E26}' | // Ḧ  [LATIN CAPITAL LETTER H WITH DIAERESIS]
418        '\u{1E28}' | // Ḩ  [LATIN CAPITAL LETTER H WITH CEDILLA]
419        '\u{1E2A}' | // Ḫ  [LATIN CAPITAL LETTER H WITH BREVE BELOW]
420        '\u{24BD}' | // Ⓗ  [CIRCLED LATIN CAPITAL LETTER H]
421        '\u{2C67}' | // Ⱨ  [LATIN CAPITAL LETTER H WITH DESCENDER]
422        '\u{2C75}' | // Ⱶ  [LATIN CAPITAL LETTER HALF H]
423        '\u{FF28}' // H  [FULLWIDTH LATIN CAPITAL LETTER H]
424        => Some("H"),
425        '\u{0125}' | // ĥ  [LATIN SMALL LETTER H WITH CIRCUMFLEX]
426        '\u{0127}' | // ħ  [LATIN SMALL LETTER H WITH STROKE]
427        '\u{021F}' | // ȟ  [LATIN SMALL LETTER H WITH CARON]
428        '\u{0265}' | // ɥ  [LATIN SMALL LETTER TURNED H]
429        '\u{0266}' | // ɦ  [LATIN SMALL LETTER H WITH HOOK]
430        '\u{02AE}' | // ʮ  [LATIN SMALL LETTER TURNED H WITH FISHHOOK]
431        '\u{02AF}' | // ʯ  [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL]
432        '\u{1E23}' | // ḣ  [LATIN SMALL LETTER H WITH DOT ABOVE]
433        '\u{1E25}' | // ḥ  [LATIN SMALL LETTER H WITH DOT BELOW]
434        '\u{1E27}' | // ḧ  [LATIN SMALL LETTER H WITH DIAERESIS]
435        '\u{1E29}' | // ḩ  [LATIN SMALL LETTER H WITH CEDILLA]
436        '\u{1E2B}' | // ḫ  [LATIN SMALL LETTER H WITH BREVE BELOW]
437        '\u{1E96}' | // ẖ  [LATIN SMALL LETTER H WITH LINE BELOW]
438        '\u{24D7}' | // ⓗ  [CIRCLED LATIN SMALL LETTER H]
439        '\u{2C68}' | // ⱨ  [LATIN SMALL LETTER H WITH DESCENDER]
440        '\u{2C76}' | // ⱶ  [LATIN SMALL LETTER HALF H]
441        '\u{FF48}' // h  [FULLWIDTH LATIN SMALL LETTER H]
442        => Some("h"),
443        '\u{01F6}' // Ƕ  http://en.wikipedia.org/wiki/Hwair  [LATIN CAPITAL LETTER HWAIR]
444        => Some("HV"),
445        '\u{24A3}' // ⒣  [PARENTHESIZED LATIN SMALL LETTER H]
446        => Some("(h)"),
447        '\u{0195}' // ƕ  [LATIN SMALL LETTER HV]
448        => Some("hv"),
449        '\u{00CC}' | // Ì  [LATIN CAPITAL LETTER I WITH GRAVE]
450        '\u{00CD}' | // Í  [LATIN CAPITAL LETTER I WITH ACUTE]
451        '\u{00CE}' | // Î  [LATIN CAPITAL LETTER I WITH CIRCUMFLEX]
452        '\u{00CF}' | // Ï  [LATIN CAPITAL LETTER I WITH DIAERESIS]
453        '\u{0128}' | // Ĩ  [LATIN CAPITAL LETTER I WITH TILDE]
454        '\u{012A}' | // Ī  [LATIN CAPITAL LETTER I WITH MACRON]
455        '\u{012C}' | // Ĭ  [LATIN CAPITAL LETTER I WITH BREVE]
456        '\u{012E}' | // Į  [LATIN CAPITAL LETTER I WITH OGONEK]
457        '\u{0130}' | // İ  [LATIN CAPITAL LETTER I WITH DOT ABOVE]
458        '\u{0196}' | // Ɩ  [LATIN CAPITAL LETTER IOTA]
459        '\u{0197}' | // Ɨ  [LATIN CAPITAL LETTER I WITH STROKE]
460        '\u{01CF}' | // Ǐ  [LATIN CAPITAL LETTER I WITH CARON]
461        '\u{0208}' | // Ȉ  [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE]
462        '\u{020A}' | // Ȋ  [LATIN CAPITAL LETTER I WITH INVERTED BREVE]
463        '\u{026A}' | // ɪ  [LATIN LETTER SMALL CAPITAL I]
464        '\u{1D7B}' | // ᵻ  [LATIN SMALL CAPITAL LETTER I WITH STROKE]
465        '\u{1E2C}' | // Ḭ  [LATIN CAPITAL LETTER I WITH TILDE BELOW]
466        '\u{1E2E}' | // Ḯ  [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE]
467        '\u{1EC8}' | // Ỉ  [LATIN CAPITAL LETTER I WITH HOOK ABOVE]
468        '\u{1ECA}' | // Ị  [LATIN CAPITAL LETTER I WITH DOT BELOW]
469        '\u{24BE}' | // Ⓘ  [CIRCLED LATIN CAPITAL LETTER I]
470        '\u{A7FE}' | // ꟾ  [LATIN EPIGRAPHIC LETTER I LONGA]
471        '\u{FF29}' // I  [FULLWIDTH LATIN CAPITAL LETTER I]
472        => Some("I"),
473        '\u{00EC}' | // ì  [LATIN SMALL LETTER I WITH GRAVE]
474        '\u{00ED}' | // í  [LATIN SMALL LETTER I WITH ACUTE]
475        '\u{00EE}' | // î  [LATIN SMALL LETTER I WITH CIRCUMFLEX]
476        '\u{00EF}' | // ï  [LATIN SMALL LETTER I WITH DIAERESIS]
477        '\u{0129}' | // ĩ  [LATIN SMALL LETTER I WITH TILDE]
478        '\u{012B}' | // ī  [LATIN SMALL LETTER I WITH MACRON]
479        '\u{012D}' | // ĭ  [LATIN SMALL LETTER I WITH BREVE]
480        '\u{012F}' | // į  [LATIN SMALL LETTER I WITH OGONEK]
481        '\u{0131}' | // ı  [LATIN SMALL LETTER DOTLESS I]
482        '\u{01D0}' | // ǐ  [LATIN SMALL LETTER I WITH CARON]
483        '\u{0209}' | // ȉ  [LATIN SMALL LETTER I WITH DOUBLE GRAVE]
484        '\u{020B}' | // ȋ  [LATIN SMALL LETTER I WITH INVERTED BREVE]
485        '\u{0268}' | // ɨ  [LATIN SMALL LETTER I WITH STROKE]
486        '\u{1D09}' | // ᴉ  [LATIN SMALL LETTER TURNED I]
487        '\u{1D62}' | // ᵢ  [LATIN SUBSCRIPT SMALL LETTER I]
488        '\u{1D7C}' | // ᵼ  [LATIN SMALL LETTER IOTA WITH STROKE]
489        '\u{1D96}' | // ᶖ  [LATIN SMALL LETTER I WITH RETROFLEX HOOK]
490        '\u{1E2D}' | // ḭ  [LATIN SMALL LETTER I WITH TILDE BELOW]
491        '\u{1E2F}' | // ḯ  [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE]
492        '\u{1EC9}' | // ỉ  [LATIN SMALL LETTER I WITH HOOK ABOVE]
493        '\u{1ECB}' | // ị  [LATIN SMALL LETTER I WITH DOT BELOW]
494        '\u{2071}' | // ⁱ  [SUPERSCRIPT LATIN SMALL LETTER I]
495        '\u{24D8}' | // ⓘ  [CIRCLED LATIN SMALL LETTER I]
496        '\u{FF49}' // i  [FULLWIDTH LATIN SMALL LETTER I]
497        => Some("i"),
498        '\u{0132}' // IJ  [LATIN CAPITAL LIGATURE IJ]
499        => Some("IJ"),
500        '\u{24A4}' // ⒤  [PARENTHESIZED LATIN SMALL LETTER I]
501        => Some("(i)"),
502        '\u{0133}' // ij  [LATIN SMALL LIGATURE IJ]
503        => Some("ij"),
504        '\u{0134}' | // Ĵ  [LATIN CAPITAL LETTER J WITH CIRCUMFLEX]
505        '\u{0248}' | // Ɉ  [LATIN CAPITAL LETTER J WITH STROKE]
506        '\u{1D0A}' | // ᴊ  [LATIN LETTER SMALL CAPITAL J]
507        '\u{24BF}' | // Ⓙ  [CIRCLED LATIN CAPITAL LETTER J]
508        '\u{FF2A}' // J  [FULLWIDTH LATIN CAPITAL LETTER J]
509        => Some("J"),
510        '\u{0135}' | // ĵ  [LATIN SMALL LETTER J WITH CIRCUMFLEX]
511        '\u{01F0}' | // ǰ  [LATIN SMALL LETTER J WITH CARON]
512        '\u{0237}' | // ȷ  [LATIN SMALL LETTER DOTLESS J]
513        '\u{0249}' | // ɉ  [LATIN SMALL LETTER J WITH STROKE]
514        '\u{025F}' | // ɟ  [LATIN SMALL LETTER DOTLESS J WITH STROKE]
515        '\u{0284}' | // ʄ  [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK]
516        '\u{029D}' | // ʝ  [LATIN SMALL LETTER J WITH CROSSED-TAIL]
517        '\u{24D9}' | // ⓙ  [CIRCLED LATIN SMALL LETTER J]
518        '\u{2C7C}' | // ⱼ  [LATIN SUBSCRIPT SMALL LETTER J]
519        '\u{FF4A}' // j  [FULLWIDTH LATIN SMALL LETTER J]
520        => Some("j"),
521        '\u{24A5}' // ⒥  [PARENTHESIZED LATIN SMALL LETTER J]
522        => Some("(j)"),
523        '\u{0136}' | // Ķ  [LATIN CAPITAL LETTER K WITH CEDILLA]
524        '\u{0198}' | // Ƙ  [LATIN CAPITAL LETTER K WITH HOOK]
525        '\u{01E8}' | // Ǩ  [LATIN CAPITAL LETTER K WITH CARON]
526        '\u{1D0B}' | // ᴋ  [LATIN LETTER SMALL CAPITAL K]
527        '\u{1E30}' | // Ḱ  [LATIN CAPITAL LETTER K WITH ACUTE]
528        '\u{1E32}' | // Ḳ  [LATIN CAPITAL LETTER K WITH DOT BELOW]
529        '\u{1E34}' | // Ḵ  [LATIN CAPITAL LETTER K WITH LINE BELOW]
530        '\u{24C0}' | // Ⓚ  [CIRCLED LATIN CAPITAL LETTER K]
531        '\u{2C69}' | // Ⱪ  [LATIN CAPITAL LETTER K WITH DESCENDER]
532        '\u{A740}' | // Ꝁ  [LATIN CAPITAL LETTER K WITH STROKE]
533        '\u{A742}' | // Ꝃ  [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE]
534        '\u{A744}' | // Ꝅ  [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE]
535        '\u{FF2B}' // K  [FULLWIDTH LATIN CAPITAL LETTER K]
536        => Some("K"),
537        '\u{0137}' | // ķ  [LATIN SMALL LETTER K WITH CEDILLA]
538        '\u{0199}' | // ƙ  [LATIN SMALL LETTER K WITH HOOK]
539        '\u{01E9}' | // ǩ  [LATIN SMALL LETTER K WITH CARON]
540        '\u{029E}' | // ʞ  [LATIN SMALL LETTER TURNED K]
541        '\u{1D84}' | // ᶄ  [LATIN SMALL LETTER K WITH PALATAL HOOK]
542        '\u{1E31}' | // ḱ  [LATIN SMALL LETTER K WITH ACUTE]
543        '\u{1E33}' | // ḳ  [LATIN SMALL LETTER K WITH DOT BELOW]
544        '\u{1E35}' | // ḵ  [LATIN SMALL LETTER K WITH LINE BELOW]
545        '\u{24DA}' | // ⓚ  [CIRCLED LATIN SMALL LETTER K]
546        '\u{2C6A}' | // ⱪ  [LATIN SMALL LETTER K WITH DESCENDER]
547        '\u{A741}' | // ꝁ  [LATIN SMALL LETTER K WITH STROKE]
548        '\u{A743}' | // ꝃ  [LATIN SMALL LETTER K WITH DIAGONAL STROKE]
549        '\u{A745}' | // ꝅ  [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE]
550        '\u{FF4B}' // k  [FULLWIDTH LATIN SMALL LETTER K]
551        => Some("k"),
552        '\u{24A6}' // ⒦  [PARENTHESIZED LATIN SMALL LETTER K]
553        => Some("(k)"),
554        '\u{0139}' | // Ĺ  [LATIN CAPITAL LETTER L WITH ACUTE]
555        '\u{013B}' | // Ļ  [LATIN CAPITAL LETTER L WITH CEDILLA]
556        '\u{013D}' | // Ľ  [LATIN CAPITAL LETTER L WITH CARON]
557        '\u{013F}' | // Ŀ  [LATIN CAPITAL LETTER L WITH MIDDLE DOT]
558        '\u{0141}' | // Ł  [LATIN CAPITAL LETTER L WITH STROKE]
559        '\u{023D}' | // Ƚ  [LATIN CAPITAL LETTER L WITH BAR]
560        '\u{029F}' | // ʟ  [LATIN LETTER SMALL CAPITAL L]
561        '\u{1D0C}' | // ᴌ  [LATIN LETTER SMALL CAPITAL L WITH STROKE]
562        '\u{1E36}' | // Ḷ  [LATIN CAPITAL LETTER L WITH DOT BELOW]
563        '\u{1E38}' | // Ḹ  [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON]
564        '\u{1E3A}' | // Ḻ  [LATIN CAPITAL LETTER L WITH LINE BELOW]
565        '\u{1E3C}' | // Ḽ  [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW]
566        '\u{24C1}' | // Ⓛ  [CIRCLED LATIN CAPITAL LETTER L]
567        '\u{2C60}' | // Ⱡ  [LATIN CAPITAL LETTER L WITH DOUBLE BAR]
568        '\u{2C62}' | // Ɫ  [LATIN CAPITAL LETTER L WITH MIDDLE TILDE]
569        '\u{A746}' | // Ꝇ  [LATIN CAPITAL LETTER BROKEN L]
570        '\u{A748}' | // Ꝉ  [LATIN CAPITAL LETTER L WITH HIGH STROKE]
571        '\u{A780}' | // Ꞁ  [LATIN CAPITAL LETTER TURNED L]
572        '\u{FF2C}' // L  [FULLWIDTH LATIN CAPITAL LETTER L]
573        => Some("L"),
574        '\u{013A}' | // ĺ  [LATIN SMALL LETTER L WITH ACUTE]
575        '\u{013C}' | // ļ  [LATIN SMALL LETTER L WITH CEDILLA]
576        '\u{013E}' | // ľ  [LATIN SMALL LETTER L WITH CARON]
577        '\u{0140}' | // ŀ  [LATIN SMALL LETTER L WITH MIDDLE DOT]
578        '\u{0142}' | // ł  [LATIN SMALL LETTER L WITH STROKE]
579        '\u{019A}' | // ƚ  [LATIN SMALL LETTER L WITH BAR]
580        '\u{0234}' | // ȴ  [LATIN SMALL LETTER L WITH CURL]
581        '\u{026B}' | // ɫ  [LATIN SMALL LETTER L WITH MIDDLE TILDE]
582        '\u{026C}' | // ɬ  [LATIN SMALL LETTER L WITH BELT]
583        '\u{026D}' | // ɭ  [LATIN SMALL LETTER L WITH RETROFLEX HOOK]
584        '\u{1D85}' | // ᶅ  [LATIN SMALL LETTER L WITH PALATAL HOOK]
585        '\u{1E37}' | // ḷ  [LATIN SMALL LETTER L WITH DOT BELOW]
586        '\u{1E39}' | // ḹ  [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON]
587        '\u{1E3B}' | // ḻ  [LATIN SMALL LETTER L WITH LINE BELOW]
588        '\u{1E3D}' | // ḽ  [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW]
589        '\u{24DB}' | // ⓛ  [CIRCLED LATIN SMALL LETTER L]
590        '\u{2C61}' | // ⱡ  [LATIN SMALL LETTER L WITH DOUBLE BAR]
591        '\u{A747}' | // ꝇ  [LATIN SMALL LETTER BROKEN L]
592        '\u{A749}' | // ꝉ  [LATIN SMALL LETTER L WITH HIGH STROKE]
593        '\u{A781}' | // ꞁ  [LATIN SMALL LETTER TURNED L]
594        '\u{FF4C}' // l  [FULLWIDTH LATIN SMALL LETTER L]
595        => Some("l"),
596        '\u{01C7}' // LJ  [LATIN CAPITAL LETTER LJ]
597        => Some("LJ"),
598        '\u{1EFA}' // Ỻ  [LATIN CAPITAL LETTER MIDDLE-WELSH LL]
599        => Some("LL"),
600        '\u{01C8}' // Lj  [LATIN CAPITAL LETTER L WITH SMALL LETTER J]
601        => Some("Lj"),
602        '\u{24A7}' // ⒧  [PARENTHESIZED LATIN SMALL LETTER L]
603        => Some("(l)"),
604        '\u{01C9}' // lj  [LATIN SMALL LETTER LJ]
605        => Some("lj"),
606        '\u{1EFB}' // ỻ  [LATIN SMALL LETTER MIDDLE-WELSH LL]
607        => Some("ll"),
608        '\u{02AA}' // ʪ  [LATIN SMALL LETTER LS DIGRAPH]
609        => Some("ls"),
610        '\u{02AB}' // ʫ  [LATIN SMALL LETTER LZ DIGRAPH]
611        => Some("lz"),
612        '\u{019C}' | // Ɯ  [LATIN CAPITAL LETTER TURNED M]
613        '\u{1D0D}' | // ᴍ  [LATIN LETTER SMALL CAPITAL M]
614        '\u{1E3E}' | // Ḿ  [LATIN CAPITAL LETTER M WITH ACUTE]
615        '\u{1E40}' | // Ṁ  [LATIN CAPITAL LETTER M WITH DOT ABOVE]
616        '\u{1E42}' | // Ṃ  [LATIN CAPITAL LETTER M WITH DOT BELOW]
617        '\u{24C2}' | // Ⓜ  [CIRCLED LATIN CAPITAL LETTER M]
618        '\u{2C6E}' | // Ɱ  [LATIN CAPITAL LETTER M WITH HOOK]
619        '\u{A7FD}' | // ꟽ  [LATIN EPIGRAPHIC LETTER INVERTED M]
620        '\u{A7FF}' | // ꟿ  [LATIN EPIGRAPHIC LETTER ARCHAIC M]
621        '\u{FF2D}' // M  [FULLWIDTH LATIN CAPITAL LETTER M]
622        => Some("M"),
623        '\u{026F}' | // ɯ  [LATIN SMALL LETTER TURNED M]
624        '\u{0270}' | // ɰ  [LATIN SMALL LETTER TURNED M WITH LONG LEG]
625        '\u{0271}' | // ɱ  [LATIN SMALL LETTER M WITH HOOK]
626        '\u{1D6F}' | // ᵯ  [LATIN SMALL LETTER M WITH MIDDLE TILDE]
627        '\u{1D86}' | // ᶆ  [LATIN SMALL LETTER M WITH PALATAL HOOK]
628        '\u{1E3F}' | // ḿ  [LATIN SMALL LETTER M WITH ACUTE]
629        '\u{1E41}' | // ṁ  [LATIN SMALL LETTER M WITH DOT ABOVE]
630        '\u{1E43}' | // ṃ  [LATIN SMALL LETTER M WITH DOT BELOW]
631        '\u{24DC}' | // ⓜ  [CIRCLED LATIN SMALL LETTER M]
632        '\u{FF4D}' // m  [FULLWIDTH LATIN SMALL LETTER M]
633        => Some("m"),
634        '\u{24A8}' // ⒨  [PARENTHESIZED LATIN SMALL LETTER M]
635        => Some("(m)"),
636        '\u{00D1}' | // Ñ  [LATIN CAPITAL LETTER N WITH TILDE]
637        '\u{0143}' | // Ń  [LATIN CAPITAL LETTER N WITH ACUTE]
638        '\u{0145}' | // Ņ  [LATIN CAPITAL LETTER N WITH CEDILLA]
639        '\u{0147}' | // Ň  [LATIN CAPITAL LETTER N WITH CARON]
640        '\u{014A}' | // Ŋ  http://en.wikipedia.org/wiki/Eng_(letter)  [LATIN CAPITAL LETTER ENG]
641        '\u{019D}' | // Ɲ  [LATIN CAPITAL LETTER N WITH LEFT HOOK]
642        '\u{01F8}' | // Ǹ  [LATIN CAPITAL LETTER N WITH GRAVE]
643        '\u{0220}' | // Ƞ  [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG]
644        '\u{0274}' | // ɴ  [LATIN LETTER SMALL CAPITAL N]
645        '\u{1D0E}' | // ᴎ  [LATIN LETTER SMALL CAPITAL REVERSED N]
646        '\u{1E44}' | // Ṅ  [LATIN CAPITAL LETTER N WITH DOT ABOVE]
647        '\u{1E46}' | // Ṇ  [LATIN CAPITAL LETTER N WITH DOT BELOW]
648        '\u{1E48}' | // Ṉ  [LATIN CAPITAL LETTER N WITH LINE BELOW]
649        '\u{1E4A}' | // Ṋ  [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW]
650        '\u{24C3}' | // Ⓝ  [CIRCLED LATIN CAPITAL LETTER N]
651        '\u{FF2E}' // N  [FULLWIDTH LATIN CAPITAL LETTER N]
652        => Some("N"),
653        '\u{00F1}' | // ñ  [LATIN SMALL LETTER N WITH TILDE]
654        '\u{0144}' | // ń  [LATIN SMALL LETTER N WITH ACUTE]
655        '\u{0146}' | // ņ  [LATIN SMALL LETTER N WITH CEDILLA]
656        '\u{0148}' | // ň  [LATIN SMALL LETTER N WITH CARON]
657        '\u{0149}' | // ʼn  [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE]
658        '\u{014B}' | // ŋ  http://en.wikipedia.org/wiki/Eng_(letter)  [LATIN SMALL LETTER ENG]
659        '\u{019E}' | // ƞ  [LATIN SMALL LETTER N WITH LONG RIGHT LEG]
660        '\u{01F9}' | // ǹ  [LATIN SMALL LETTER N WITH GRAVE]
661        '\u{0235}' | // ȵ  [LATIN SMALL LETTER N WITH CURL]
662        '\u{0272}' | // ɲ  [LATIN SMALL LETTER N WITH LEFT HOOK]
663        '\u{0273}' | // ɳ  [LATIN SMALL LETTER N WITH RETROFLEX HOOK]
664        '\u{1D70}' | // ᵰ  [LATIN SMALL LETTER N WITH MIDDLE TILDE]
665        '\u{1D87}' | // ᶇ  [LATIN SMALL LETTER N WITH PALATAL HOOK]
666        '\u{1E45}' | // ṅ  [LATIN SMALL LETTER N WITH DOT ABOVE]
667        '\u{1E47}' | // ṇ  [LATIN SMALL LETTER N WITH DOT BELOW]
668        '\u{1E49}' | // ṉ  [LATIN SMALL LETTER N WITH LINE BELOW]
669        '\u{1E4B}' | // ṋ  [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW]
670        '\u{207F}' | // ⁿ  [SUPERSCRIPT LATIN SMALL LETTER N]
671        '\u{24DD}' | // ⓝ  [CIRCLED LATIN SMALL LETTER N]
672        '\u{FF4E}' // n  [FULLWIDTH LATIN SMALL LETTER N]
673        => Some("n"),
674        '\u{01CA}' // NJ  [LATIN CAPITAL LETTER NJ]
675        => Some("NJ"),
676        '\u{01CB}' // Nj  [LATIN CAPITAL LETTER N WITH SMALL LETTER J]
677        => Some("Nj"),
678        '\u{24A9}' // ⒩  [PARENTHESIZED LATIN SMALL LETTER N]
679        => Some("(n)"),
680        '\u{01CC}' // nj  [LATIN SMALL LETTER NJ]
681        => Some("nj"),
682        '\u{00D2}' | // Ò  [LATIN CAPITAL LETTER O WITH GRAVE]
683        '\u{00D3}' | // Ó  [LATIN CAPITAL LETTER O WITH ACUTE]
684        '\u{00D4}' | // Ô  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX]
685        '\u{00D5}' | // Õ  [LATIN CAPITAL LETTER O WITH TILDE]
686        '\u{00D6}' | // Ö  [LATIN CAPITAL LETTER O WITH DIAERESIS]
687        '\u{00D8}' | // Ø  [LATIN CAPITAL LETTER O WITH STROKE]
688        '\u{014C}' | // Ō  [LATIN CAPITAL LETTER O WITH MACRON]
689        '\u{014E}' | // Ŏ  [LATIN CAPITAL LETTER O WITH BREVE]
690        '\u{0150}' | // Ő  [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE]
691        '\u{0186}' | // Ɔ  [LATIN CAPITAL LETTER OPEN O]
692        '\u{019F}' | // Ɵ  [LATIN CAPITAL LETTER O WITH MIDDLE TILDE]
693        '\u{01A0}' | // Ơ  [LATIN CAPITAL LETTER O WITH HORN]
694        '\u{01D1}' | // Ǒ  [LATIN CAPITAL LETTER O WITH CARON]
695        '\u{01EA}' | // Ǫ  [LATIN CAPITAL LETTER O WITH OGONEK]
696        '\u{01EC}' | // Ǭ  [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON]
697        '\u{01FE}' | // Ǿ  [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE]
698        '\u{020C}' | // Ȍ  [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE]
699        '\u{020E}' | // Ȏ  [LATIN CAPITAL LETTER O WITH INVERTED BREVE]
700        '\u{022A}' | // Ȫ  [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON]
701        '\u{022C}' | // Ȭ  [LATIN CAPITAL LETTER O WITH TILDE AND MACRON]
702        '\u{022E}' | // Ȯ  [LATIN CAPITAL LETTER O WITH DOT ABOVE]
703        '\u{0230}' | // Ȱ  [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON]
704        '\u{1D0F}' | // ᴏ  [LATIN LETTER SMALL CAPITAL O]
705        '\u{1D10}' | // ᴐ  [LATIN LETTER SMALL CAPITAL OPEN O]
706        '\u{1E4C}' | // Ṍ  [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE]
707        '\u{1E4E}' | // Ṏ  [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS]
708        '\u{1E50}' | // Ṑ  [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE]
709        '\u{1E52}' | // Ṓ  [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE]
710        '\u{1ECC}' | // Ọ  [LATIN CAPITAL LETTER O WITH DOT BELOW]
711        '\u{1ECE}' | // Ỏ  [LATIN CAPITAL LETTER O WITH HOOK ABOVE]
712        '\u{1ED0}' | // Ố  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE]
713        '\u{1ED2}' | // Ồ  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE]
714        '\u{1ED4}' | // Ổ  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
715        '\u{1ED6}' | // Ỗ  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE]
716        '\u{1ED8}' | // Ộ  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
717        '\u{1EDA}' | // Ớ  [LATIN CAPITAL LETTER O WITH HORN AND ACUTE]
718        '\u{1EDC}' | // Ờ  [LATIN CAPITAL LETTER O WITH HORN AND GRAVE]
719        '\u{1EDE}' | // Ở  [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE]
720        '\u{1EE0}' | // Ỡ  [LATIN CAPITAL LETTER O WITH HORN AND TILDE]
721        '\u{1EE2}' | // Ợ  [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW]
722        '\u{24C4}' | // Ⓞ  [CIRCLED LATIN CAPITAL LETTER O]
723        '\u{A74A}' | // Ꝋ  [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY]
724        '\u{A74C}' | // Ꝍ  [LATIN CAPITAL LETTER O WITH LOOP]
725        '\u{FF2F}' // O  [FULLWIDTH LATIN CAPITAL LETTER O]
726        => Some("O"),
727        '\u{00F2}' | // ò  [LATIN SMALL LETTER O WITH GRAVE]
728        '\u{00F3}' | // ó  [LATIN SMALL LETTER O WITH ACUTE]
729        '\u{00F4}' | // ô  [LATIN SMALL LETTER O WITH CIRCUMFLEX]
730        '\u{00F5}' | // õ  [LATIN SMALL LETTER O WITH TILDE]
731        '\u{00F6}' | // ö  [LATIN SMALL LETTER O WITH DIAERESIS]
732        '\u{00F8}' | // ø  [LATIN SMALL LETTER O WITH STROKE]
733        '\u{014D}' | // ō  [LATIN SMALL LETTER O WITH MACRON]
734        '\u{014F}' | // ŏ  [LATIN SMALL LETTER O WITH BREVE]
735        '\u{0151}' | // ő  [LATIN SMALL LETTER O WITH DOUBLE ACUTE]
736        '\u{01A1}' | // ơ  [LATIN SMALL LETTER O WITH HORN]
737        '\u{01D2}' | // ǒ  [LATIN SMALL LETTER O WITH CARON]
738        '\u{01EB}' | // ǫ  [LATIN SMALL LETTER O WITH OGONEK]
739        '\u{01ED}' | // ǭ  [LATIN SMALL LETTER O WITH OGONEK AND MACRON]
740        '\u{01FF}' | // ǿ  [LATIN SMALL LETTER O WITH STROKE AND ACUTE]
741        '\u{020D}' | // ȍ  [LATIN SMALL LETTER O WITH DOUBLE GRAVE]
742        '\u{020F}' | // ȏ  [LATIN SMALL LETTER O WITH INVERTED BREVE]
743        '\u{022B}' | // ȫ  [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON]
744        '\u{022D}' | // ȭ  [LATIN SMALL LETTER O WITH TILDE AND MACRON]
745        '\u{022F}' | // ȯ  [LATIN SMALL LETTER O WITH DOT ABOVE]
746        '\u{0231}' | // ȱ  [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON]
747        '\u{0254}' | // ɔ  [LATIN SMALL LETTER OPEN O]
748        '\u{0275}' | // ɵ  [LATIN SMALL LETTER BARRED O]
749        '\u{1D16}' | // ᴖ  [LATIN SMALL LETTER TOP HALF O]
750        '\u{1D17}' | // ᴗ  [LATIN SMALL LETTER BOTTOM HALF O]
751        '\u{1D97}' | // ᶗ  [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK]
752        '\u{1E4D}' | // ṍ  [LATIN SMALL LETTER O WITH TILDE AND ACUTE]
753        '\u{1E4F}' | // ṏ  [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS]
754        '\u{1E51}' | // ṑ  [LATIN SMALL LETTER O WITH MACRON AND GRAVE]
755        '\u{1E53}' | // ṓ  [LATIN SMALL LETTER O WITH MACRON AND ACUTE]
756        '\u{1ECD}' | // ọ  [LATIN SMALL LETTER O WITH DOT BELOW]
757        '\u{1ECF}' | // ỏ  [LATIN SMALL LETTER O WITH HOOK ABOVE]
758        '\u{1ED1}' | // ố  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE]
759        '\u{1ED3}' | // ồ  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE]
760        '\u{1ED5}' | // ổ  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
761        '\u{1ED7}' | // ỗ  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE]
762        '\u{1ED9}' | // ộ  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
763        '\u{1EDB}' | // ớ  [LATIN SMALL LETTER O WITH HORN AND ACUTE]
764        '\u{1EDD}' | // ờ  [LATIN SMALL LETTER O WITH HORN AND GRAVE]
765        '\u{1EDF}' | // ở  [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE]
766        '\u{1EE1}' | // ỡ  [LATIN SMALL LETTER O WITH HORN AND TILDE]
767        '\u{1EE3}' | // ợ  [LATIN SMALL LETTER O WITH HORN AND DOT BELOW]
768        '\u{2092}' | // ₒ  [LATIN SUBSCRIPT SMALL LETTER O]
769        '\u{24DE}' | // ⓞ  [CIRCLED LATIN SMALL LETTER O]
770        '\u{2C7A}' | // ⱺ  [LATIN SMALL LETTER O WITH LOW RING INSIDE]
771        '\u{A74B}' | // ꝋ  [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY]
772        '\u{A74D}' | // ꝍ  [LATIN SMALL LETTER O WITH LOOP]
773        '\u{FF4F}' // o  [FULLWIDTH LATIN SMALL LETTER O]
774        => Some("o"),
775        '\u{0152}' | // Œ  [LATIN CAPITAL LIGATURE OE]
776        '\u{0276}' // ɶ  [LATIN LETTER SMALL CAPITAL OE]
777        => Some("OE"),
778        '\u{A74E}' // Ꝏ  [LATIN CAPITAL LETTER OO]
779        => Some("OO"),
780        '\u{0222}' | // Ȣ  http://en.wikipedia.org/wiki/OU  [LATIN CAPITAL LETTER OU]
781        '\u{1D15}' // ᴕ  [LATIN LETTER SMALL CAPITAL OU]
782        => Some("OU"),
783        '\u{24AA}' // ⒪  [PARENTHESIZED LATIN SMALL LETTER O]
784        => Some("(o)"),
785        '\u{0153}' | // œ  [LATIN SMALL LIGATURE OE]
786        '\u{1D14}' // ᴔ  [LATIN SMALL LETTER TURNED OE]
787        => Some("oe"),
788        '\u{A74F}' // ꝏ  [LATIN SMALL LETTER OO]
789        => Some("oo"),
790        '\u{0223}' // ȣ  http://en.wikipedia.org/wiki/OU  [LATIN SMALL LETTER OU]
791        => Some("ou"),
792        '\u{01A4}' | // Ƥ  [LATIN CAPITAL LETTER P WITH HOOK]
793        '\u{1D18}' | // ᴘ  [LATIN LETTER SMALL CAPITAL P]
794        '\u{1E54}' | // Ṕ  [LATIN CAPITAL LETTER P WITH ACUTE]
795        '\u{1E56}' | // Ṗ  [LATIN CAPITAL LETTER P WITH DOT ABOVE]
796        '\u{24C5}' | // Ⓟ  [CIRCLED LATIN CAPITAL LETTER P]
797        '\u{2C63}' | // Ᵽ  [LATIN CAPITAL LETTER P WITH STROKE]
798        '\u{A750}' | // Ꝑ  [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER]
799        '\u{A752}' | // Ꝓ  [LATIN CAPITAL LETTER P WITH FLOURISH]
800        '\u{A754}' | // Ꝕ  [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL]
801        '\u{FF30}' // P  [FULLWIDTH LATIN CAPITAL LETTER P]
802        => Some("P"),
803        '\u{01A5}' | // ƥ  [LATIN SMALL LETTER P WITH HOOK]
804        '\u{1D71}' | // ᵱ  [LATIN SMALL LETTER P WITH MIDDLE TILDE]
805        '\u{1D7D}' | // ᵽ  [LATIN SMALL LETTER P WITH STROKE]
806        '\u{1D88}' | // ᶈ  [LATIN SMALL LETTER P WITH PALATAL HOOK]
807        '\u{1E55}' | // ṕ  [LATIN SMALL LETTER P WITH ACUTE]
808        '\u{1E57}' | // ṗ  [LATIN SMALL LETTER P WITH DOT ABOVE]
809        '\u{24DF}' | // ⓟ  [CIRCLED LATIN SMALL LETTER P]
810        '\u{A751}' | // ꝑ  [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER]
811        '\u{A753}' | // ꝓ  [LATIN SMALL LETTER P WITH FLOURISH]
812        '\u{A755}' | // ꝕ  [LATIN SMALL LETTER P WITH SQUIRREL TAIL]
813        '\u{A7FC}' | // ꟼ  [LATIN EPIGRAPHIC LETTER REVERSED P]
814        '\u{FF50}' // p  [FULLWIDTH LATIN SMALL LETTER P]
815        => Some("p"),
816        '\u{24AB}' // ⒫  [PARENTHESIZED LATIN SMALL LETTER P]
817        => Some("(p)"),
818        '\u{024A}' | // Ɋ  [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL]
819        '\u{24C6}' | // Ⓠ  [CIRCLED LATIN CAPITAL LETTER Q]
820        '\u{A756}' | // Ꝗ  [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER]
821        '\u{A758}' | // Ꝙ  [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE]
822        '\u{FF31}' // Q  [FULLWIDTH LATIN CAPITAL LETTER Q]
823        => Some("Q"),
824        '\u{0138}' | // ĸ  http://en.wikipedia.org/wiki/Kra_(letter)  [LATIN SMALL LETTER KRA]
825        '\u{024B}' | // ɋ  [LATIN SMALL LETTER Q WITH HOOK TAIL]
826        '\u{02A0}' | // ʠ  [LATIN SMALL LETTER Q WITH HOOK]
827        '\u{24E0}' | // ⓠ  [CIRCLED LATIN SMALL LETTER Q]
828        '\u{A757}' | // ꝗ  [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER]
829        '\u{A759}' | // ꝙ  [LATIN SMALL LETTER Q WITH DIAGONAL STROKE]
830        '\u{FF51}' // q  [FULLWIDTH LATIN SMALL LETTER Q]
831        => Some("q"),
832        '\u{24AC}' // ⒬  [PARENTHESIZED LATIN SMALL LETTER Q]
833        => Some("(q)"),
834        '\u{0239}' // ȹ  [LATIN SMALL LETTER QP DIGRAPH]
835        => Some("qp"),
836        '\u{0154}' | // Ŕ  [LATIN CAPITAL LETTER R WITH ACUTE]
837        '\u{0156}' | // Ŗ  [LATIN CAPITAL LETTER R WITH CEDILLA]
838        '\u{0158}' | // Ř  [LATIN CAPITAL LETTER R WITH CARON]
839        '\u{0210}' | // Ȓ  [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE]
840        '\u{0212}' | // Ȓ  [LATIN CAPITAL LETTER R WITH INVERTED BREVE]
841        '\u{024C}' | // Ɍ  [LATIN CAPITAL LETTER R WITH STROKE]
842        '\u{0280}' | // ʀ  [LATIN LETTER SMALL CAPITAL R]
843        '\u{0281}' | // ʁ  [LATIN LETTER SMALL CAPITAL INVERTED R]
844        '\u{1D19}' | // ᴙ  [LATIN LETTER SMALL CAPITAL REVERSED R]
845        '\u{1D1A}' | // ᴚ  [LATIN LETTER SMALL CAPITAL TURNED R]
846        '\u{1E58}' | // Ṙ  [LATIN CAPITAL LETTER R WITH DOT ABOVE]
847        '\u{1E5A}' | // Ṛ  [LATIN CAPITAL LETTER R WITH DOT BELOW]
848        '\u{1E5C}' | // Ṝ  [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON]
849        '\u{1E5E}' | // Ṟ  [LATIN CAPITAL LETTER R WITH LINE BELOW]
850        '\u{24C7}' | // Ⓡ  [CIRCLED LATIN CAPITAL LETTER R]
851        '\u{2C64}' | // Ɽ  [LATIN CAPITAL LETTER R WITH TAIL]
852        '\u{A75A}' | // Ꝛ  [LATIN CAPITAL LETTER R ROTUNDA]
853        '\u{A782}' | // Ꞃ  [LATIN CAPITAL LETTER INSULAR R]
854        '\u{FF32}' // R  [FULLWIDTH LATIN CAPITAL LETTER R]
855        => Some("R"),
856        '\u{0155}' | // ŕ  [LATIN SMALL LETTER R WITH ACUTE]
857        '\u{0157}' | // ŗ  [LATIN SMALL LETTER R WITH CEDILLA]
858        '\u{0159}' | // ř  [LATIN SMALL LETTER R WITH CARON]
859        '\u{0211}' | // ȑ  [LATIN SMALL LETTER R WITH DOUBLE GRAVE]
860        '\u{0213}' | // ȓ  [LATIN SMALL LETTER R WITH INVERTED BREVE]
861        '\u{024D}' | // ɍ  [LATIN SMALL LETTER R WITH STROKE]
862        '\u{027C}' | // ɼ  [LATIN SMALL LETTER R WITH LONG LEG]
863        '\u{027D}' | // ɽ  [LATIN SMALL LETTER R WITH TAIL]
864        '\u{027E}' | // ɾ  [LATIN SMALL LETTER R WITH FISHHOOK]
865        '\u{027F}' | // ɿ  [LATIN SMALL LETTER REVERSED R WITH FISHHOOK]
866        '\u{1D63}' | // ᵣ  [LATIN SUBSCRIPT SMALL LETTER R]
867        '\u{1D72}' | // ᵲ  [LATIN SMALL LETTER R WITH MIDDLE TILDE]
868        '\u{1D73}' | // ᵳ  [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE]
869        '\u{1D89}' | // ᶉ  [LATIN SMALL LETTER R WITH PALATAL HOOK]
870        '\u{1E59}' | // ṙ  [LATIN SMALL LETTER R WITH DOT ABOVE]
871        '\u{1E5B}' | // ṛ  [LATIN SMALL LETTER R WITH DOT BELOW]
872        '\u{1E5D}' | // ṝ  [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON]
873        '\u{1E5F}' | // ṟ  [LATIN SMALL LETTER R WITH LINE BELOW]
874        '\u{24E1}' | // ⓡ  [CIRCLED LATIN SMALL LETTER R]
875        '\u{A75B}' | // ꝛ  [LATIN SMALL LETTER R ROTUNDA]
876        '\u{A783}' | // ꞃ  [LATIN SMALL LETTER INSULAR R]
877        '\u{FF52}' // r  [FULLWIDTH LATIN SMALL LETTER R]
878        => Some("r"),
879        '\u{24AD}' // ⒭  [PARENTHESIZED LATIN SMALL LETTER R]
880        => Some("(r)"),
881        '\u{015A}' | // Ś  [LATIN CAPITAL LETTER S WITH ACUTE]
882        '\u{015C}' | // Ŝ  [LATIN CAPITAL LETTER S WITH CIRCUMFLEX]
883        '\u{015E}' | // Ş  [LATIN CAPITAL LETTER S WITH CEDILLA]
884        '\u{0160}' | // Š  [LATIN CAPITAL LETTER S WITH CARON]
885        '\u{0218}' | // Ș  [LATIN CAPITAL LETTER S WITH COMMA BELOW]
886        '\u{1E60}' | // Ṡ  [LATIN CAPITAL LETTER S WITH DOT ABOVE]
887        '\u{1E62}' | // Ṣ  [LATIN CAPITAL LETTER S WITH DOT BELOW]
888        '\u{1E64}' | // Ṥ  [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE]
889        '\u{1E66}' | // Ṧ  [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE]
890        '\u{1E68}' | // Ṩ  [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE]
891        '\u{24C8}' | // Ⓢ  [CIRCLED LATIN CAPITAL LETTER S]
892        '\u{A731}' | // ꜱ  [LATIN LETTER SMALL CAPITAL S]
893        '\u{A785}' | // ꞅ  [LATIN SMALL LETTER INSULAR S]
894        '\u{FF33}' // S  [FULLWIDTH LATIN CAPITAL LETTER S]
895        => Some("S"),
896        '\u{015B}' | // ś  [LATIN SMALL LETTER S WITH ACUTE]
897        '\u{015D}' | // ŝ  [LATIN SMALL LETTER S WITH CIRCUMFLEX]
898        '\u{015F}' | // ş  [LATIN SMALL LETTER S WITH CEDILLA]
899        '\u{0161}' | // š  [LATIN SMALL LETTER S WITH CARON]
900        '\u{017F}' | // ſ  http://en.wikipedia.org/wiki/Long_S  [LATIN SMALL LETTER LONG S]
901        '\u{0219}' | // ș  [LATIN SMALL LETTER S WITH COMMA BELOW]
902        '\u{023F}' | // ȿ  [LATIN SMALL LETTER S WITH SWASH TAIL]
903        '\u{0282}' | // ʂ  [LATIN SMALL LETTER S WITH HOOK]
904        '\u{1D74}' | // ᵴ  [LATIN SMALL LETTER S WITH MIDDLE TILDE]
905        '\u{1D8A}' | // ᶊ  [LATIN SMALL LETTER S WITH PALATAL HOOK]
906        '\u{1E61}' | // ṡ  [LATIN SMALL LETTER S WITH DOT ABOVE]
907        '\u{1E63}' | // ṣ  [LATIN SMALL LETTER S WITH DOT BELOW]
908        '\u{1E65}' | // ṥ  [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE]
909        '\u{1E67}' | // ṧ  [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE]
910        '\u{1E69}' | // ṩ  [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE]
911        '\u{1E9C}' | // ẜ  [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE]
912        '\u{1E9D}' | // ẝ  [LATIN SMALL LETTER LONG S WITH HIGH STROKE]
913        '\u{24E2}' | // ⓢ  [CIRCLED LATIN SMALL LETTER S]
914        '\u{A784}' | // Ꞅ  [LATIN CAPITAL LETTER INSULAR S]
915        '\u{FF53}' // s  [FULLWIDTH LATIN SMALL LETTER S]
916        => Some("s"),
917        '\u{1E9E}' // ẞ  [LATIN CAPITAL LETTER SHARP S]
918        => Some("SS"),
919        '\u{24AE}' // ⒮  [PARENTHESIZED LATIN SMALL LETTER S]
920        => Some("(s)"),
921        '\u{00DF}' // ß  [LATIN SMALL LETTER SHARP S]
922        => Some("ss"),
923        '\u{FB06}' // st  [LATIN SMALL LIGATURE ST]
924        => Some("st"),
925        '\u{0162}' | // Ţ  [LATIN CAPITAL LETTER T WITH CEDILLA]
926        '\u{0164}' | // Ť  [LATIN CAPITAL LETTER T WITH CARON]
927        '\u{0166}' | // Ŧ  [LATIN CAPITAL LETTER T WITH STROKE]
928        '\u{01AC}' | // Ƭ  [LATIN CAPITAL LETTER T WITH HOOK]
929        '\u{01AE}' | // Ʈ  [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK]
930        '\u{021A}' | // Ț  [LATIN CAPITAL LETTER T WITH COMMA BELOW]
931        '\u{023E}' | // Ⱦ  [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE]
932        '\u{1D1B}' | // ᴛ  [LATIN LETTER SMALL CAPITAL T]
933        '\u{1E6A}' | // Ṫ  [LATIN CAPITAL LETTER T WITH DOT ABOVE]
934        '\u{1E6C}' | // Ṭ  [LATIN CAPITAL LETTER T WITH DOT BELOW]
935        '\u{1E6E}' | // Ṯ  [LATIN CAPITAL LETTER T WITH LINE BELOW]
936        '\u{1E70}' | // Ṱ  [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW]
937        '\u{24C9}' | // Ⓣ  [CIRCLED LATIN CAPITAL LETTER T]
938        '\u{A786}' | // Ꞇ  [LATIN CAPITAL LETTER INSULAR T]
939        '\u{FF34}' // T  [FULLWIDTH LATIN CAPITAL LETTER T]
940        => Some("T"),
941        '\u{0163}' | // ţ  [LATIN SMALL LETTER T WITH CEDILLA]
942        '\u{0165}' | // ť  [LATIN SMALL LETTER T WITH CARON]
943        '\u{0167}' | // ŧ  [LATIN SMALL LETTER T WITH STROKE]
944        '\u{01AB}' | // ƫ  [LATIN SMALL LETTER T WITH PALATAL HOOK]
945        '\u{01AD}' | // ƭ  [LATIN SMALL LETTER T WITH HOOK]
946        '\u{021B}' | // ț  [LATIN SMALL LETTER T WITH COMMA BELOW]
947        '\u{0236}' | // ȶ  [LATIN SMALL LETTER T WITH CURL]
948        '\u{0287}' | // ʇ  [LATIN SMALL LETTER TURNED T]
949        '\u{0288}' | // ʈ  [LATIN SMALL LETTER T WITH RETROFLEX HOOK]
950        '\u{1D75}' | // ᵵ  [LATIN SMALL LETTER T WITH MIDDLE TILDE]
951        '\u{1E6B}' | // ṫ  [LATIN SMALL LETTER T WITH DOT ABOVE]
952        '\u{1E6D}' | // ṭ  [LATIN SMALL LETTER T WITH DOT BELOW]
953        '\u{1E6F}' | // ṯ  [LATIN SMALL LETTER T WITH LINE BELOW]
954        '\u{1E71}' | // ṱ  [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW]
955        '\u{1E97}' | // ẗ  [LATIN SMALL LETTER T WITH DIAERESIS]
956        '\u{24E3}' | // ⓣ  [CIRCLED LATIN SMALL LETTER T]
957        '\u{2C66}' | // ⱦ  [LATIN SMALL LETTER T WITH DIAGONAL STROKE]
958        '\u{FF54}' // t  [FULLWIDTH LATIN SMALL LETTER T]
959        => Some("t"),
960        '\u{00DE}' | // Þ  [LATIN CAPITAL LETTER THORN]
961        '\u{A766}' // Ꝧ  [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER]
962        => Some("TH"),
963        '\u{A728}' // Ꜩ  [LATIN CAPITAL LETTER TZ]
964        => Some("TZ"),
965        '\u{24AF}' // ⒯  [PARENTHESIZED LATIN SMALL LETTER T]
966        => Some("(t)"),
967        '\u{02A8}' // ʨ  [LATIN SMALL LETTER TC DIGRAPH WITH CURL]
968        => Some("tc"),
969        '\u{00FE}' | // þ  [LATIN SMALL LETTER THORN]
970        '\u{1D7A}' | // ᵺ  [LATIN SMALL LETTER TH WITH STRIKETHROUGH]
971        '\u{A767}' // ꝧ  [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER]
972        => Some("th"),
973        '\u{02A6}' // ʦ  [LATIN SMALL LETTER TS DIGRAPH]
974        => Some("ts"),
975        '\u{A729}' // ꜩ  [LATIN SMALL LETTER TZ]
976        => Some("tz"),
977        '\u{00D9}' | // Ù  [LATIN CAPITAL LETTER U WITH GRAVE]
978        '\u{00DA}' | // Ú  [LATIN CAPITAL LETTER U WITH ACUTE]
979        '\u{00DB}' | // Û  [LATIN CAPITAL LETTER U WITH CIRCUMFLEX]
980        '\u{00DC}' | // Ü  [LATIN CAPITAL LETTER U WITH DIAERESIS]
981        '\u{0168}' | // Ũ  [LATIN CAPITAL LETTER U WITH TILDE]
982        '\u{016A}' | // Ū  [LATIN CAPITAL LETTER U WITH MACRON]
983        '\u{016C}' | // Ŭ  [LATIN CAPITAL LETTER U WITH BREVE]
984        '\u{016E}' | // Ů  [LATIN CAPITAL LETTER U WITH RING ABOVE]
985        '\u{0170}' | // Ű  [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE]
986        '\u{0172}' | // Ų  [LATIN CAPITAL LETTER U WITH OGONEK]
987        '\u{01AF}' | // Ư  [LATIN CAPITAL LETTER U WITH HORN]
988        '\u{01D3}' | // Ǔ  [LATIN CAPITAL LETTER U WITH CARON]
989        '\u{01D5}' | // Ǖ  [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON]
990        '\u{01D7}' | // Ǘ  [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE]
991        '\u{01D9}' | // Ǚ  [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON]
992        '\u{01DB}' | // Ǜ  [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE]
993        '\u{0214}' | // Ȕ  [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE]
994        '\u{0216}' | // Ȗ  [LATIN CAPITAL LETTER U WITH INVERTED BREVE]
995        '\u{0244}' | // Ʉ  [LATIN CAPITAL LETTER U BAR]
996        '\u{1D1C}' | // ᴜ  [LATIN LETTER SMALL CAPITAL U]
997        '\u{1D7E}' | // ᵾ  [LATIN SMALL CAPITAL LETTER U WITH STROKE]
998        '\u{1E72}' | // Ṳ  [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW]
999        '\u{1E74}' | // Ṵ  [LATIN CAPITAL LETTER U WITH TILDE BELOW]
1000        '\u{1E76}' | // Ṷ  [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW]
1001        '\u{1E78}' | // Ṹ  [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE]
1002        '\u{1E7A}' | // Ṻ  [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS]
1003        '\u{1EE4}' | // Ụ  [LATIN CAPITAL LETTER U WITH DOT BELOW]
1004        '\u{1EE6}' | // Ủ  [LATIN CAPITAL LETTER U WITH HOOK ABOVE]
1005        '\u{1EE8}' | // Ứ  [LATIN CAPITAL LETTER U WITH HORN AND ACUTE]
1006        '\u{1EEA}' | // Ừ  [LATIN CAPITAL LETTER U WITH HORN AND GRAVE]
1007        '\u{1EEC}' | // Ử  [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE]
1008        '\u{1EEE}' | // Ữ  [LATIN CAPITAL LETTER U WITH HORN AND TILDE]
1009        '\u{1EF0}' | // Ự  [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW]
1010        '\u{24CA}' | // Ⓤ  [CIRCLED LATIN CAPITAL LETTER U]
1011        '\u{FF35}' // U  [FULLWIDTH LATIN CAPITAL LETTER U]
1012        => Some("U"),
1013        '\u{00F9}' | // ù  [LATIN SMALL LETTER U WITH GRAVE]
1014        '\u{00FA}' | // ú  [LATIN SMALL LETTER U WITH ACUTE]
1015        '\u{00FB}' | // û  [LATIN SMALL LETTER U WITH CIRCUMFLEX]
1016        '\u{00FC}' | // ü  [LATIN SMALL LETTER U WITH DIAERESIS]
1017        '\u{0169}' | // ũ  [LATIN SMALL LETTER U WITH TILDE]
1018        '\u{016B}' | // ū  [LATIN SMALL LETTER U WITH MACRON]
1019        '\u{016D}' | // ŭ  [LATIN SMALL LETTER U WITH BREVE]
1020        '\u{016F}' | // ů  [LATIN SMALL LETTER U WITH RING ABOVE]
1021        '\u{0171}' | // ű  [LATIN SMALL LETTER U WITH DOUBLE ACUTE]
1022        '\u{0173}' | // ų  [LATIN SMALL LETTER U WITH OGONEK]
1023        '\u{01B0}' | // ư  [LATIN SMALL LETTER U WITH HORN]
1024        '\u{01D4}' | // ǔ  [LATIN SMALL LETTER U WITH CARON]
1025        '\u{01D6}' | // ǖ  [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON]
1026        '\u{01D8}' | // ǘ  [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE]
1027        '\u{01DA}' | // ǚ  [LATIN SMALL LETTER U WITH DIAERESIS AND CARON]
1028        '\u{01DC}' | // ǜ  [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE]
1029        '\u{0215}' | // ȕ  [LATIN SMALL LETTER U WITH DOUBLE GRAVE]
1030        '\u{0217}' | // ȗ  [LATIN SMALL LETTER U WITH INVERTED BREVE]
1031        '\u{0289}' | // ʉ  [LATIN SMALL LETTER U BAR]
1032        '\u{1D64}' | // ᵤ  [LATIN SUBSCRIPT SMALL LETTER U]
1033        '\u{1D99}' | // ᶙ  [LATIN SMALL LETTER U WITH RETROFLEX HOOK]
1034        '\u{1E73}' | // ṳ  [LATIN SMALL LETTER U WITH DIAERESIS BELOW]
1035        '\u{1E75}' | // ṵ  [LATIN SMALL LETTER U WITH TILDE BELOW]
1036        '\u{1E77}' | // ṷ  [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW]
1037        '\u{1E79}' | // ṹ  [LATIN SMALL LETTER U WITH TILDE AND ACUTE]
1038        '\u{1E7B}' | // ṻ  [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS]
1039        '\u{1EE5}' | // ụ  [LATIN SMALL LETTER U WITH DOT BELOW]
1040        '\u{1EE7}' | // ủ  [LATIN SMALL LETTER U WITH HOOK ABOVE]
1041        '\u{1EE9}' | // ứ  [LATIN SMALL LETTER U WITH HORN AND ACUTE]
1042        '\u{1EEB}' | // ừ  [LATIN SMALL LETTER U WITH HORN AND GRAVE]
1043        '\u{1EED}' | // ử  [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE]
1044        '\u{1EEF}' | // ữ  [LATIN SMALL LETTER U WITH HORN AND TILDE]
1045        '\u{1EF1}' | // ự  [LATIN SMALL LETTER U WITH HORN AND DOT BELOW]
1046        '\u{24E4}' | // ⓤ  [CIRCLED LATIN SMALL LETTER U]
1047        '\u{FF55}' // u  [FULLWIDTH LATIN SMALL LETTER U]
1048        => Some("u"),
1049        '\u{24B0}' // ⒰  [PARENTHESIZED LATIN SMALL LETTER U]
1050        => Some("(u)"),
1051        '\u{1D6B}' // ᵫ  [LATIN SMALL LETTER UE]
1052        => Some("ue"),
1053        '\u{01B2}' | // Ʋ  [LATIN CAPITAL LETTER V WITH HOOK]
1054        '\u{0245}' | // Ʌ  [LATIN CAPITAL LETTER TURNED V]
1055        '\u{1D20}' | // ᴠ  [LATIN LETTER SMALL CAPITAL V]
1056        '\u{1E7C}' | // Ṽ  [LATIN CAPITAL LETTER V WITH TILDE]
1057        '\u{1E7E}' | // Ṿ  [LATIN CAPITAL LETTER V WITH DOT BELOW]
1058        '\u{1EFC}' | // Ỽ  [LATIN CAPITAL LETTER MIDDLE-WELSH V]
1059        '\u{24CB}' | // Ⓥ  [CIRCLED LATIN CAPITAL LETTER V]
1060        '\u{A75E}' | // Ꝟ  [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE]
1061        '\u{A768}' | // Ꝩ  [LATIN CAPITAL LETTER VEND]
1062        '\u{FF36}' // V  [FULLWIDTH LATIN CAPITAL LETTER V]
1063        => Some("V"),
1064        '\u{028B}' | // ʋ  [LATIN SMALL LETTER V WITH HOOK]
1065        '\u{028C}' | // ʌ  [LATIN SMALL LETTER TURNED V]
1066        '\u{1D65}' | // ᵥ  [LATIN SUBSCRIPT SMALL LETTER V]
1067        '\u{1D8C}' | // ᶌ  [LATIN SMALL LETTER V WITH PALATAL HOOK]
1068        '\u{1E7D}' | // ṽ  [LATIN SMALL LETTER V WITH TILDE]
1069        '\u{1E7F}' | // ṿ  [LATIN SMALL LETTER V WITH DOT BELOW]
1070        '\u{24E5}' | // ⓥ  [CIRCLED LATIN SMALL LETTER V]
1071        '\u{2C71}' | // ⱱ  [LATIN SMALL LETTER V WITH RIGHT HOOK]
1072        '\u{2C74}' | // ⱴ  [LATIN SMALL LETTER V WITH CURL]
1073        '\u{A75F}' | // ꝟ  [LATIN SMALL LETTER V WITH DIAGONAL STROKE]
1074        '\u{FF56}' // v  [FULLWIDTH LATIN SMALL LETTER V]
1075        => Some("v"),
1076        '\u{A760}' // Ꝡ  [LATIN CAPITAL LETTER VY]
1077        => Some("VY"),
1078        '\u{24B1}' // ⒱  [PARENTHESIZED LATIN SMALL LETTER V]
1079        => Some("(v)"),
1080        '\u{A761}' // ꝡ  [LATIN SMALL LETTER VY]
1081        => Some("vy"),
1082        '\u{0174}' | // Ŵ  [LATIN CAPITAL LETTER W WITH CIRCUMFLEX]
1083        '\u{01F7}' | // Ƿ  http://en.wikipedia.org/wiki/Wynn  [LATIN CAPITAL LETTER WYNN]
1084        '\u{1D21}' | // ᴡ  [LATIN LETTER SMALL CAPITAL W]
1085        '\u{1E80}' | // Ẁ  [LATIN CAPITAL LETTER W WITH GRAVE]
1086        '\u{1E82}' | // Ẃ  [LATIN CAPITAL LETTER W WITH ACUTE]
1087        '\u{1E84}' | // Ẅ  [LATIN CAPITAL LETTER W WITH DIAERESIS]
1088        '\u{1E86}' | // Ẇ  [LATIN CAPITAL LETTER W WITH DOT ABOVE]
1089        '\u{1E88}' | // Ẉ  [LATIN CAPITAL LETTER W WITH DOT BELOW]
1090        '\u{24CC}' | // Ⓦ  [CIRCLED LATIN CAPITAL LETTER W]
1091        '\u{2C72}' | // Ⱳ  [LATIN CAPITAL LETTER W WITH HOOK]
1092        '\u{FF37}' // W  [FULLWIDTH LATIN CAPITAL LETTER W]
1093        => Some("W"),
1094        '\u{0175}' | // ŵ  [LATIN SMALL LETTER W WITH CIRCUMFLEX]
1095        '\u{01BF}' | // ƿ  http://en.wikipedia.org/wiki/Wynn  [LATIN LETTER WYNN]
1096        '\u{028D}' | // ʍ  [LATIN SMALL LETTER TURNED W]
1097        '\u{1E81}' | // ẁ  [LATIN SMALL LETTER W WITH GRAVE]
1098        '\u{1E83}' | // ẃ  [LATIN SMALL LETTER W WITH ACUTE]
1099        '\u{1E85}' | // ẅ  [LATIN SMALL LETTER W WITH DIAERESIS]
1100        '\u{1E87}' | // ẇ  [LATIN SMALL LETTER W WITH DOT ABOVE]
1101        '\u{1E89}' | // ẉ  [LATIN SMALL LETTER W WITH DOT BELOW]
1102        '\u{1E98}' | // ẘ  [LATIN SMALL LETTER W WITH RING ABOVE]
1103        '\u{24E6}' | // ⓦ  [CIRCLED LATIN SMALL LETTER W]
1104        '\u{2C73}' | // ⱳ  [LATIN SMALL LETTER W WITH HOOK]
1105        '\u{FF57}' // w  [FULLWIDTH LATIN SMALL LETTER W]
1106        => Some("w"),
1107        '\u{24B2}' // ⒲  [PARENTHESIZED LATIN SMALL LETTER W]
1108        => Some("(w)"),
1109        '\u{1E8A}' | // Ẋ  [LATIN CAPITAL LETTER X WITH DOT ABOVE]
1110        '\u{1E8C}' | // Ẍ  [LATIN CAPITAL LETTER X WITH DIAERESIS]
1111        '\u{24CD}' | // Ⓧ  [CIRCLED LATIN CAPITAL LETTER X]
1112        '\u{FF38}' // X  [FULLWIDTH LATIN CAPITAL LETTER X]
1113        => Some("X"),
1114        '\u{1D8D}' | // ᶍ  [LATIN SMALL LETTER X WITH PALATAL HOOK]
1115        '\u{1E8B}' | // ẋ  [LATIN SMALL LETTER X WITH DOT ABOVE]
1116        '\u{1E8D}' | // ẍ  [LATIN SMALL LETTER X WITH DIAERESIS]
1117        '\u{2093}' | // ₓ  [LATIN SUBSCRIPT SMALL LETTER X]
1118        '\u{24E7}' | // ⓧ  [CIRCLED LATIN SMALL LETTER X]
1119        '\u{FF58}' // x  [FULLWIDTH LATIN SMALL LETTER X]
1120        => Some("x"),
1121        '\u{24B3}' // ⒳  [PARENTHESIZED LATIN SMALL LETTER X]
1122        => Some("(x)"),
1123        '\u{00DD}' | // Ý  [LATIN CAPITAL LETTER Y WITH ACUTE]
1124        '\u{0176}' | // Ŷ  [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX]
1125        '\u{0178}' | // Ÿ  [LATIN CAPITAL LETTER Y WITH DIAERESIS]
1126        '\u{01B3}' | // Ƴ  [LATIN CAPITAL LETTER Y WITH HOOK]
1127        '\u{0232}' | // Ȳ  [LATIN CAPITAL LETTER Y WITH MACRON]
1128        '\u{024E}' | // Ɏ  [LATIN CAPITAL LETTER Y WITH STROKE]
1129        '\u{028F}' | // ʏ  [LATIN LETTER SMALL CAPITAL Y]
1130        '\u{1E8E}' | // Ẏ  [LATIN CAPITAL LETTER Y WITH DOT ABOVE]
1131        '\u{1EF2}' | // Ỳ  [LATIN CAPITAL LETTER Y WITH GRAVE]
1132        '\u{1EF4}' | // Ỵ  [LATIN CAPITAL LETTER Y WITH DOT BELOW]
1133        '\u{1EF6}' | // Ỷ  [LATIN CAPITAL LETTER Y WITH HOOK ABOVE]
1134        '\u{1EF8}' | // Ỹ  [LATIN CAPITAL LETTER Y WITH TILDE]
1135        '\u{1EFE}' | // Ỿ  [LATIN CAPITAL LETTER Y WITH LOOP]
1136        '\u{24CE}' | // Ⓨ  [CIRCLED LATIN CAPITAL LETTER Y]
1137        '\u{FF39}' // Y  [FULLWIDTH LATIN CAPITAL LETTER Y]
1138        => Some("Y"),
1139        '\u{00FD}' | // ý  [LATIN SMALL LETTER Y WITH ACUTE]
1140        '\u{00FF}' | // ÿ  [LATIN SMALL LETTER Y WITH DIAERESIS]
1141        '\u{0177}' | // ŷ  [LATIN SMALL LETTER Y WITH CIRCUMFLEX]
1142        '\u{01B4}' | // ƴ  [LATIN SMALL LETTER Y WITH HOOK]
1143        '\u{0233}' | // ȳ  [LATIN SMALL LETTER Y WITH MACRON]
1144        '\u{024F}' | // ɏ  [LATIN SMALL LETTER Y WITH STROKE]
1145        '\u{028E}' | // ʎ  [LATIN SMALL LETTER TURNED Y]
1146        '\u{1E8F}' | // ẏ  [LATIN SMALL LETTER Y WITH DOT ABOVE]
1147        '\u{1E99}' | // ẙ  [LATIN SMALL LETTER Y WITH RING ABOVE]
1148        '\u{1EF3}' | // ỳ  [LATIN SMALL LETTER Y WITH GRAVE]
1149        '\u{1EF5}' | // ỵ  [LATIN SMALL LETTER Y WITH DOT BELOW]
1150        '\u{1EF7}' | // ỷ  [LATIN SMALL LETTER Y WITH HOOK ABOVE]
1151        '\u{1EF9}' | // ỹ  [LATIN SMALL LETTER Y WITH TILDE]
1152        '\u{1EFF}' | // ỿ  [LATIN SMALL LETTER Y WITH LOOP]
1153        '\u{24E8}' | // ⓨ  [CIRCLED LATIN SMALL LETTER Y]
1154        '\u{FF59}' // y  [FULLWIDTH LATIN SMALL LETTER Y]
1155        => Some("y"),
1156        '\u{24B4}' // ⒴  [PARENTHESIZED LATIN SMALL LETTER Y]
1157        => Some("(y)"),
1158        '\u{0179}' | // Ź  [LATIN CAPITAL LETTER Z WITH ACUTE]
1159        '\u{017B}' | // Ż  [LATIN CAPITAL LETTER Z WITH DOT ABOVE]
1160        '\u{017D}' | // Ž  [LATIN CAPITAL LETTER Z WITH CARON]
1161        '\u{01B5}' | // Ƶ  [LATIN CAPITAL LETTER Z WITH STROKE]
1162        '\u{021C}' | // Ȝ  http://en.wikipedia.org/wiki/Yogh  [LATIN CAPITAL LETTER YOGH]
1163        '\u{0224}' | // Ȥ  [LATIN CAPITAL LETTER Z WITH HOOK]
1164        '\u{1D22}' | // ᴢ  [LATIN LETTER SMALL CAPITAL Z]
1165        '\u{1E90}' | // Ẑ  [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX]
1166        '\u{1E92}' | // Ẓ  [LATIN CAPITAL LETTER Z WITH DOT BELOW]
1167        '\u{1E94}' | // Ẕ  [LATIN CAPITAL LETTER Z WITH LINE BELOW]
1168        '\u{24CF}' | // Ⓩ  [CIRCLED LATIN CAPITAL LETTER Z]
1169        '\u{2C6B}' | // Ⱬ  [LATIN CAPITAL LETTER Z WITH DESCENDER]
1170        '\u{A762}' | // Ꝣ  [LATIN CAPITAL LETTER VISIGOTHIC Z]
1171        '\u{FF3A}' // Z  [FULLWIDTH LATIN CAPITAL LETTER Z]
1172        => Some("Z"),
1173        '\u{017A}' | // ź  [LATIN SMALL LETTER Z WITH ACUTE]
1174        '\u{017C}' | // ż  [LATIN SMALL LETTER Z WITH DOT ABOVE]
1175        '\u{017E}' | // ž  [LATIN SMALL LETTER Z WITH CARON]
1176        '\u{01B6}' | // ƶ  [LATIN SMALL LETTER Z WITH STROKE]
1177        '\u{021D}' | // ȝ  http://en.wikipedia.org/wiki/Yogh  [LATIN SMALL LETTER YOGH]
1178        '\u{0225}' | // ȥ  [LATIN SMALL LETTER Z WITH HOOK]
1179        '\u{0240}' | // ɀ  [LATIN SMALL LETTER Z WITH SWASH TAIL]
1180        '\u{0290}' | // ʐ  [LATIN SMALL LETTER Z WITH RETROFLEX HOOK]
1181        '\u{0291}' | // ʑ  [LATIN SMALL LETTER Z WITH CURL]
1182        '\u{1D76}' | // ᵶ  [LATIN SMALL LETTER Z WITH MIDDLE TILDE]
1183        '\u{1D8E}' | // ᶎ  [LATIN SMALL LETTER Z WITH PALATAL HOOK]
1184        '\u{1E91}' | // ẑ  [LATIN SMALL LETTER Z WITH CIRCUMFLEX]
1185        '\u{1E93}' | // ẓ  [LATIN SMALL LETTER Z WITH DOT BELOW]
1186        '\u{1E95}' | // ẕ  [LATIN SMALL LETTER Z WITH LINE BELOW]
1187        '\u{24E9}' | // ⓩ  [CIRCLED LATIN SMALL LETTER Z]
1188        '\u{2C6C}' | // ⱬ  [LATIN SMALL LETTER Z WITH DESCENDER]
1189        '\u{A763}' | // ꝣ  [LATIN SMALL LETTER VISIGOTHIC Z]
1190        '\u{FF5A}' // z  [FULLWIDTH LATIN SMALL LETTER Z]
1191        => Some("z"),
1192        '\u{24B5}' // ⒵  [PARENTHESIZED LATIN SMALL LETTER Z]
1193        => Some("(z)"),
1194        '\u{2070}' | // ⁰  [SUPERSCRIPT ZERO]
1195        '\u{2080}' | // ₀  [SUBSCRIPT ZERO]
1196        '\u{24EA}' | // ⓪  [CIRCLED DIGIT ZERO]
1197        '\u{24FF}' | // ⓿  [NEGATIVE CIRCLED DIGIT ZERO]
1198        '\u{FF10}' // 0  [FULLWIDTH DIGIT ZERO]
1199        => Some("0"),
1200        '\u{00B9}' | // ¹  [SUPERSCRIPT ONE]
1201        '\u{2081}' | // ₁  [SUBSCRIPT ONE]
1202        '\u{2460}' | // ①  [CIRCLED DIGIT ONE]
1203        '\u{24F5}' | // ⓵  [DOUBLE CIRCLED DIGIT ONE]
1204        '\u{2776}' | // ❶  [DINGBAT NEGATIVE CIRCLED DIGIT ONE]
1205        '\u{2780}' | // ➀  [DINGBAT CIRCLED SANS-SERIF DIGIT ONE]
1206        '\u{278A}' | // ➊  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE]
1207        '\u{FF11}' // 1  [FULLWIDTH DIGIT ONE]
1208        => Some("1"),
1209        '\u{2488}' // ⒈  [DIGIT ONE FULL STOP]
1210        => Some("1."),
1211        '\u{2474}' // ⑴  [PARENTHESIZED DIGIT ONE]
1212        => Some("(1)"),
1213        '\u{00B2}' | // ²  [SUPERSCRIPT TWO]
1214        '\u{2082}' | // ₂  [SUBSCRIPT TWO]
1215        '\u{2461}' | // ②  [CIRCLED DIGIT TWO]
1216        '\u{24F6}' | // ⓶  [DOUBLE CIRCLED DIGIT TWO]
1217        '\u{2777}' | // ❷  [DINGBAT NEGATIVE CIRCLED DIGIT TWO]
1218        '\u{2781}' | // ➁  [DINGBAT CIRCLED SANS-SERIF DIGIT TWO]
1219        '\u{278B}' | // ➋  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO]
1220        '\u{FF12}' // 2  [FULLWIDTH DIGIT TWO]
1221        => Some("2"),
1222        '\u{2489}' // ⒉  [DIGIT TWO FULL STOP]
1223        => Some("2."),
1224        '\u{2475}' // ⑵  [PARENTHESIZED DIGIT TWO]
1225        => Some("(2)"),
1226        '\u{00B3}' | // ³  [SUPERSCRIPT THREE]
1227        '\u{2083}' | // ₃  [SUBSCRIPT THREE]
1228        '\u{2462}' | // ③  [CIRCLED DIGIT THREE]
1229        '\u{24F7}' | // ⓷  [DOUBLE CIRCLED DIGIT THREE]
1230        '\u{2778}' | // ❸  [DINGBAT NEGATIVE CIRCLED DIGIT THREE]
1231        '\u{2782}' | // ➂  [DINGBAT CIRCLED SANS-SERIF DIGIT THREE]
1232        '\u{278C}' | // ➌  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE]
1233        '\u{FF13}' // 3  [FULLWIDTH DIGIT THREE]
1234        => Some("3"),
1235        '\u{248A}' // ⒊  [DIGIT THREE FULL STOP]
1236        => Some("3."),
1237        '\u{2476}' // ⑶  [PARENTHESIZED DIGIT THREE]
1238        => Some("(3)"),
1239        '\u{2074}' | // ⁴  [SUPERSCRIPT FOUR]
1240        '\u{2084}' | // ₄  [SUBSCRIPT FOUR]
1241        '\u{2463}' | // ④  [CIRCLED DIGIT FOUR]
1242        '\u{24F8}' | // ⓸  [DOUBLE CIRCLED DIGIT FOUR]
1243        '\u{2779}' | // ❹  [DINGBAT NEGATIVE CIRCLED DIGIT FOUR]
1244        '\u{2783}' | // ➃  [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR]
1245        '\u{278D}' | // ➍  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR]
1246        '\u{FF14}' // 4  [FULLWIDTH DIGIT FOUR]
1247        => Some("4"),
1248        '\u{248B}' // ⒋  [DIGIT FOUR FULL STOP]
1249        => Some("4."),
1250        '\u{2477}' // ⑷  [PARENTHESIZED DIGIT FOUR]
1251        => Some("(4)"),
1252        '\u{2075}' | // ⁵  [SUPERSCRIPT FIVE]
1253        '\u{2085}' | // ₅  [SUBSCRIPT FIVE]
1254        '\u{2464}' | // ⑤  [CIRCLED DIGIT FIVE]
1255        '\u{24F9}' | // ⓹  [DOUBLE CIRCLED DIGIT FIVE]
1256        '\u{277A}' | // ❺  [DINGBAT NEGATIVE CIRCLED DIGIT FIVE]
1257        '\u{2784}' | // ➄  [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE]
1258        '\u{278E}' | // ➎  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE]
1259        '\u{FF15}' // 5  [FULLWIDTH DIGIT FIVE]
1260        => Some("5"),
1261        '\u{248C}' // ⒌  [DIGIT FIVE FULL STOP]
1262        => Some("5."),
1263        '\u{2478}' // ⑸  [PARENTHESIZED DIGIT FIVE]
1264        => Some("(5)"),
1265        '\u{2076}' | // ⁶  [SUPERSCRIPT SIX]
1266        '\u{2086}' | // ₆  [SUBSCRIPT SIX]
1267        '\u{2465}' | // ⑥  [CIRCLED DIGIT SIX]
1268        '\u{24FA}' | // ⓺  [DOUBLE CIRCLED DIGIT SIX]
1269        '\u{277B}' | // ❻  [DINGBAT NEGATIVE CIRCLED DIGIT SIX]
1270        '\u{2785}' | // ➅  [DINGBAT CIRCLED SANS-SERIF DIGIT SIX]
1271        '\u{278F}' | // ➏  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX]
1272        '\u{FF16}' // 6  [FULLWIDTH DIGIT SIX]
1273        => Some("6"),
1274        '\u{248D}' // ⒍  [DIGIT SIX FULL STOP]
1275        => Some("6."),
1276        '\u{2479}' // ⑹  [PARENTHESIZED DIGIT SIX]
1277        => Some("(6)"),
1278        '\u{2077}' | // ⁷  [SUPERSCRIPT SEVEN]
1279        '\u{2087}' | // ₇  [SUBSCRIPT SEVEN]
1280        '\u{2466}' | // ⑦  [CIRCLED DIGIT SEVEN]
1281        '\u{24FB}' | // ⓻  [DOUBLE CIRCLED DIGIT SEVEN]
1282        '\u{277C}' | // ❼  [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN]
1283        '\u{2786}' | // ➆  [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN]
1284        '\u{2790}' | // ➐  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN]
1285        '\u{FF17}' // 7  [FULLWIDTH DIGIT SEVEN]
1286        => Some("7"),
1287        '\u{248E}' // ⒎  [DIGIT SEVEN FULL STOP]
1288        => Some("7."),
1289        '\u{247A}' // ⑺  [PARENTHESIZED DIGIT SEVEN]
1290        => Some("(7)"),
1291        '\u{2078}' | // ⁸  [SUPERSCRIPT EIGHT]
1292        '\u{2088}' | // ₈  [SUBSCRIPT EIGHT]
1293        '\u{2467}' | // ⑧  [CIRCLED DIGIT EIGHT]
1294        '\u{24FC}' | // ⓼  [DOUBLE CIRCLED DIGIT EIGHT]
1295        '\u{277D}' | // ❽  [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT]
1296        '\u{2787}' | // ➇  [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT]
1297        '\u{2791}' | // ➑  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT]
1298        '\u{FF18}' // 8  [FULLWIDTH DIGIT EIGHT]
1299        => Some("8"),
1300        '\u{248F}' // ⒏  [DIGIT EIGHT FULL STOP]
1301        => Some("8."),
1302        '\u{247B}' // ⑻  [PARENTHESIZED DIGIT EIGHT]
1303        => Some("(8)"),
1304        '\u{2079}' | // ⁹  [SUPERSCRIPT NINE]
1305        '\u{2089}' | // ₉  [SUBSCRIPT NINE]
1306        '\u{2468}' | // ⑨  [CIRCLED DIGIT NINE]
1307        '\u{24FD}' | // ⓽  [DOUBLE CIRCLED DIGIT NINE]
1308        '\u{277E}' | // ❾  [DINGBAT NEGATIVE CIRCLED DIGIT NINE]
1309        '\u{2788}' | // ➈  [DINGBAT CIRCLED SANS-SERIF DIGIT NINE]
1310        '\u{2792}' | // ➒  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE]
1311        '\u{FF19}' // 9  [FULLWIDTH DIGIT NINE]
1312        => Some("9"),
1313        '\u{2490}' // ⒐  [DIGIT NINE FULL STOP]
1314        => Some("9."),
1315        '\u{247C}' // ⑼  [PARENTHESIZED DIGIT NINE]
1316        => Some("(9)"),
1317        '\u{2469}' | // ⑩  [CIRCLED NUMBER TEN]
1318        '\u{24FE}' | // ⓾  [DOUBLE CIRCLED NUMBER TEN]
1319        '\u{277F}' | // ❿  [DINGBAT NEGATIVE CIRCLED NUMBER TEN]
1320        '\u{2789}' | // ➉  [DINGBAT CIRCLED SANS-SERIF NUMBER TEN]
1321        '\u{2793}' // ➓  [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN]
1322        => Some("10"),
1323        '\u{2491}' // ⒑  [NUMBER TEN FULL STOP]
1324        => Some("10."),
1325        '\u{247D}' // ⑽  [PARENTHESIZED NUMBER TEN]
1326        => Some("(10)"),
1327        '\u{246A}' | // ⑪  [CIRCLED NUMBER ELEVEN]
1328        '\u{24EB}' // ⓫  [NEGATIVE CIRCLED NUMBER ELEVEN]
1329        => Some("11"),
1330        '\u{2492}' // ⒒  [NUMBER ELEVEN FULL STOP]
1331        => Some("11."),
1332        '\u{247E}' // ⑾  [PARENTHESIZED NUMBER ELEVEN]
1333        => Some("(11)"),
1334        '\u{246B}' | // ⑫  [CIRCLED NUMBER TWELVE]
1335        '\u{24EC}' // ⓬  [NEGATIVE CIRCLED NUMBER TWELVE]
1336        => Some("12"),
1337        '\u{2493}' // ⒓  [NUMBER TWELVE FULL STOP]
1338        => Some("12."),
1339        '\u{247F}' // ⑿  [PARENTHESIZED NUMBER TWELVE]
1340        => Some("(12)"),
1341        '\u{246C}' | // ⑬  [CIRCLED NUMBER THIRTEEN]
1342        '\u{24ED}' // ⓭  [NEGATIVE CIRCLED NUMBER THIRTEEN]
1343        => Some("13"),
1344        '\u{2494}' // ⒔  [NUMBER THIRTEEN FULL STOP]
1345        => Some("13."),
1346        '\u{2480}' // ⒀  [PARENTHESIZED NUMBER THIRTEEN]
1347        => Some("(13)"),
1348        '\u{246D}' | // ⑭  [CIRCLED NUMBER FOURTEEN]
1349        '\u{24EE}' // ⓮  [NEGATIVE CIRCLED NUMBER FOURTEEN]
1350        => Some("14"),
1351        '\u{2495}' // ⒕  [NUMBER FOURTEEN FULL STOP]
1352        => Some("14."),
1353        '\u{2481}' // ⒁  [PARENTHESIZED NUMBER FOURTEEN]
1354        => Some("(14)"),
1355        '\u{246E}' | // ⑮  [CIRCLED NUMBER FIFTEEN]
1356        '\u{24EF}' // ⓯  [NEGATIVE CIRCLED NUMBER FIFTEEN]
1357        => Some("15"),
1358        '\u{2496}' // ⒖  [NUMBER FIFTEEN FULL STOP]
1359        => Some("15."),
1360        '\u{2482}' // ⒂  [PARENTHESIZED NUMBER FIFTEEN]
1361        => Some("(15)"),
1362        '\u{246F}' | // ⑯  [CIRCLED NUMBER SIXTEEN]
1363        '\u{24F0}' // ⓰  [NEGATIVE CIRCLED NUMBER SIXTEEN]
1364        => Some("16"),
1365        '\u{2497}' // ⒗  [NUMBER SIXTEEN FULL STOP]
1366        => Some("16."),
1367        '\u{2483}' // ⒃  [PARENTHESIZED NUMBER SIXTEEN]
1368        => Some("(16)"),
1369        '\u{2470}' | // ⑰  [CIRCLED NUMBER SEVENTEEN]
1370        '\u{24F1}' // ⓱  [NEGATIVE CIRCLED NUMBER SEVENTEEN]
1371        => Some("17"),
1372        '\u{2498}' // ⒘  [NUMBER SEVENTEEN FULL STOP]
1373        => Some("17."),
1374        '\u{2484}' // ⒄  [PARENTHESIZED NUMBER SEVENTEEN]
1375        => Some("(17)"),
1376        '\u{2471}' | // ⑱  [CIRCLED NUMBER EIGHTEEN]
1377        '\u{24F2}' // ⓲  [NEGATIVE CIRCLED NUMBER EIGHTEEN]
1378        => Some("18"),
1379        '\u{2499}' // ⒙  [NUMBER EIGHTEEN FULL STOP]
1380        => Some("18."),
1381        '\u{2485}' // ⒅  [PARENTHESIZED NUMBER EIGHTEEN]
1382        => Some("(18)"),
1383        '\u{2472}' | // ⑲  [CIRCLED NUMBER NINETEEN]
1384        '\u{24F3}' // ⓳  [NEGATIVE CIRCLED NUMBER NINETEEN]
1385        => Some("19"),
1386        '\u{249A}' // ⒚  [NUMBER NINETEEN FULL STOP]
1387        => Some("19."),
1388        '\u{2486}' // ⒆  [PARENTHESIZED NUMBER NINETEEN]
1389        => Some("(19)"),
1390        '\u{2473}' | // ⑳  [CIRCLED NUMBER TWENTY]
1391        '\u{24F4}' // ⓴  [NEGATIVE CIRCLED NUMBER TWENTY]
1392        => Some("20"),
1393        '\u{249B}' // ⒛  [NUMBER TWENTY FULL STOP]
1394        => Some("20."),
1395        '\u{2487}' // ⒇  [PARENTHESIZED NUMBER TWENTY]
1396        => Some("(20)"),
1397        '\u{00AB}' | // «  [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK]
1398        '\u{00BB}' | // »  [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK]
1399        '\u{201C}' | // “  [LEFT DOUBLE QUOTATION MARK]
1400        '\u{201D}' | // ”  [RIGHT DOUBLE QUOTATION MARK]
1401        '\u{201E}' | // „  [DOUBLE LOW-9 QUOTATION MARK]
1402        '\u{2033}' | // ″  [DOUBLE PRIME]
1403        '\u{2036}' | // ‶  [REVERSED DOUBLE PRIME]
1404        '\u{275D}' | // ❝  [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT]
1405        '\u{275E}' | // ❞  [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT]
1406        '\u{276E}' | // ❮  [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT]
1407        '\u{276F}' | // ❯  [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT]
1408        '\u{FF02}' // "  [FULLWIDTH QUOTATION MARK]
1409        => Some("\""),
1410        '\u{2018}' | // ‘  [LEFT SINGLE QUOTATION MARK]
1411        '\u{2019}' | // ’  [RIGHT SINGLE QUOTATION MARK]
1412        '\u{201A}' | // ‚  [SINGLE LOW-9 QUOTATION MARK]
1413        '\u{201B}' | // ‛  [SINGLE HIGH-REVERSED-9 QUOTATION MARK]
1414        '\u{2032}' | // ′  [PRIME]
1415        '\u{2035}' | // ‵  [REVERSED PRIME]
1416        '\u{2039}' | // ‹  [SINGLE LEFT-POINTING ANGLE QUOTATION MARK]
1417        '\u{203A}' | // ›  [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK]
1418        '\u{275B}' | // ❛  [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT]
1419        '\u{275C}' | // ❜  [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT]
1420        '\u{FF07}' // '  [FULLWIDTH APOSTROPHE]
1421        => Some("\'"),
1422        '\u{2010}' | // ‐  [HYPHEN]
1423        '\u{2011}' | // ‑  [NON-BREAKING HYPHEN]
1424        '\u{2012}' | // ‒  [FIGURE DASH]
1425        '\u{2013}' | // –  [EN DASH]
1426        '\u{2014}' | // —  [EM DASH]
1427        '\u{207B}' | // ⁻  [SUPERSCRIPT MINUS]
1428        '\u{208B}' | // ₋  [SUBSCRIPT MINUS]
1429        '\u{FF0D}' // -  [FULLWIDTH HYPHEN-MINUS]
1430        => Some("-"),
1431        '\u{2045}' | // ⁅  [LEFT SQUARE BRACKET WITH QUILL]
1432        '\u{2772}' | // ❲  [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT]
1433        '\u{FF3B}' // [  [FULLWIDTH LEFT SQUARE BRACKET]
1434        => Some("["),
1435        '\u{2046}' | // ⁆  [RIGHT SQUARE BRACKET WITH QUILL]
1436        '\u{2773}' | // ❳  [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT]
1437        '\u{FF3D}' // ]  [FULLWIDTH RIGHT SQUARE BRACKET]
1438        => Some("]"),
1439        '\u{207D}' | // ⁽  [SUPERSCRIPT LEFT PARENTHESIS]
1440        '\u{208D}' | // ₍  [SUBSCRIPT LEFT PARENTHESIS]
1441        '\u{2768}' | // ❨  [MEDIUM LEFT PARENTHESIS ORNAMENT]
1442        '\u{276A}' | // ❪  [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT]
1443        '\u{FF08}' // (  [FULLWIDTH LEFT PARENTHESIS]
1444        => Some("("),
1445        '\u{2E28}' // ⸨  [LEFT DOUBLE PARENTHESIS]
1446        => Some("(("),
1447        '\u{207E}' | // ⁾  [SUPERSCRIPT RIGHT PARENTHESIS]
1448        '\u{208E}' | // ₎  [SUBSCRIPT RIGHT PARENTHESIS]
1449        '\u{2769}' | // ❩  [MEDIUM RIGHT PARENTHESIS ORNAMENT]
1450        '\u{276B}' | // ❫  [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT]
1451        '\u{FF09}' // )  [FULLWIDTH RIGHT PARENTHESIS]
1452        => Some(")"),
1453        '\u{2E29}' // ⸩  [RIGHT DOUBLE PARENTHESIS]
1454        => Some("))"),
1455        '\u{276C}' | // ❬  [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT]
1456        '\u{2770}' | // ❰  [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT]
1457        '\u{FF1C}' // <  [FULLWIDTH LESS-THAN SIGN]
1458        => Some("<"),
1459        '\u{276D}' | // ❭  [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT]
1460        '\u{2771}' | // ❱  [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT]
1461        '\u{FF1E}' // >  [FULLWIDTH GREATER-THAN SIGN]
1462        => Some(">"),
1463        '\u{2774}' | // ❴  [MEDIUM LEFT CURLY BRACKET ORNAMENT]
1464        '\u{FF5B}' // {  [FULLWIDTH LEFT CURLY BRACKET]
1465        => Some("{"),
1466        '\u{2775}' | // ❵  [MEDIUM RIGHT CURLY BRACKET ORNAMENT]
1467        '\u{FF5D}' // }  [FULLWIDTH RIGHT CURLY BRACKET]
1468        => Some("}"),
1469        '\u{207A}' | // ⁺  [SUPERSCRIPT PLUS SIGN]
1470        '\u{208A}' | // ₊  [SUBSCRIPT PLUS SIGN]
1471        '\u{FF0B}' // +  [FULLWIDTH PLUS SIGN]
1472        => Some("+"),
1473        '\u{207C}' | // ⁼  [SUPERSCRIPT EQUALS SIGN]
1474        '\u{208C}' | // ₌  [SUBSCRIPT EQUALS SIGN]
1475        '\u{FF1D}' // =  [FULLWIDTH EQUALS SIGN]
1476        => Some("="),
1477        '\u{FF01}' // !  [FULLWIDTH EXCLAMATION MARK]
1478        => Some("!"),
1479        '\u{203C}' // ‼  [DOUBLE EXCLAMATION MARK]
1480        => Some("!!"),
1481        '\u{2049}' // ⁉  [EXCLAMATION QUESTION MARK]
1482        => Some("!?"),
1483        '\u{FF03}' // #  [FULLWIDTH NUMBER SIGN]
1484        => Some("#"),
1485        '\u{FF04}' // $  [FULLWIDTH DOLLAR SIGN]
1486        => Some("$"),
1487        '\u{2052}' | // ⁒  [COMMERCIAL MINUS SIGN]
1488        '\u{FF05}' // %  [FULLWIDTH PERCENT SIGN]
1489        => Some("%"),
1490        '\u{FF06}' // &  [FULLWIDTH AMPERSAND]
1491        => Some("&"),
1492        '\u{204E}' | // ⁎  [LOW ASTERISK]
1493        '\u{FF0A}' // *  [FULLWIDTH ASTERISK]
1494        => Some("*"),
1495        '\u{FF0C}' // ,  [FULLWIDTH COMMA]
1496        => Some(","),
1497        '\u{FF0E}' // .  [FULLWIDTH FULL STOP]
1498        => Some("."),
1499        '\u{2044}' | // ⁄  [FRACTION SLASH]
1500        '\u{FF0F}' // /  [FULLWIDTH SOLIDUS]
1501        => Some("/"),
1502        '\u{FF1A}' // :  [FULLWIDTH COLON]
1503        => Some(":"),
1504        '\u{204F}' | // ⁏  [REVERSED SEMICOLON]
1505        '\u{FF1B}' // ;  [FULLWIDTH SEMICOLON]
1506        => Some(";"),
1507        '\u{FF1F}' // ?  [FULLWIDTH QUESTION MARK]
1508        => Some("?"),
1509        '\u{2047}' // ⁇  [DOUBLE QUESTION MARK]
1510        => Some("??"),
1511        '\u{2048}' // ⁈  [QUESTION EXCLAMATION MARK]
1512        => Some("?!"),
1513        '\u{FF20}' // @  [FULLWIDTH COMMERCIAL AT]
1514        => Some("@"),
1515        '\u{FF3C}' // \  [FULLWIDTH REVERSE SOLIDUS]
1516        => Some("\\"),
1517        '\u{2038}' | // ‸  [CARET]
1518        '\u{FF3E}' // ^  [FULLWIDTH CIRCUMFLEX ACCENT]
1519        => Some("^"),
1520        '\u{FF3F}' // _  [FULLWIDTH LOW LINE]
1521        => Some("_"),
1522        '\u{2053}' | // ⁓  [SWUNG DASH]
1523        '\u{FF5E}' // ~  [FULLWIDTH TILDE]
1524        => Some("~"),
1525        _ => None
1526    }
1527}
1528
1529// https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilter.java#L187
1530fn to_ascii(text: &str, output: &mut String) {
1531    output.clear();
1532
1533    for c in text.chars() {
1534        if let Some(folded) = fold_non_ascii_char(c) {
1535            output.push_str(folded);
1536        } else {
1537            output.push(c);
1538        }
1539    }
1540}
1541
1542#[cfg(test)]
1543mod tests {
1544    use std::iter;
1545
1546    use super::to_ascii;
1547    use crate::tokenizer::{AsciiFoldingFilter, RawTokenizer, SimpleTokenizer, TextAnalyzer};
1548
1549    #[test]
1550    fn test_ascii_folding() {
1551        assert_eq!(&folding_helper("Ràmon"), &["Ramon"]);
1552        assert_eq!(&folding_helper("accentué"), &["accentue"]);
1553        assert_eq!(&folding_helper("âäàéè"), &["aaaee"]);
1554    }
1555
1556    #[test]
1557    fn test_no_change() {
1558        assert_eq!(&folding_helper("Usagi"), &["Usagi"]);
1559    }
1560
1561    fn folding_helper(text: &str) -> Vec<String> {
1562        let mut tokens = Vec::new();
1563        TextAnalyzer::from(SimpleTokenizer)
1564            .filter(AsciiFoldingFilter)
1565            .token_stream(text)
1566            .process(&mut |token| {
1567                tokens.push(token.text.clone());
1568            });
1569        tokens
1570    }
1571
1572    fn folding_using_raw_tokenizer_helper(text: &str) -> String {
1573        let mut token_stream = TextAnalyzer::from(RawTokenizer)
1574            .filter(AsciiFoldingFilter)
1575            .token_stream(text);
1576        token_stream.advance();
1577        token_stream.token().text.clone()
1578    }
1579
1580    #[test]
1581    fn test_latin1_characters() {
1582        let latin1_string = "Des mot clés À LA CHAÎNE À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï IJ Ð Ñ
1583                   Ò Ó Ô Õ Ö Ø Œ Þ Ù Ú Û Ü Ý Ÿ à á â ã ä å æ ç è é ê ë ì í î ï ij
1584                   ð ñ ò ó ô õ ö ø œ ß þ ù ú û ü ý ÿ fi fl";
1585        let mut vec: Vec<&str> = vec!["Des", "mot", "cles", "A", "LA", "CHAINE"];
1586        vec.extend(iter::repeat("A").take(6));
1587        vec.extend(iter::repeat("AE").take(1));
1588        vec.extend(iter::repeat("C").take(1));
1589        vec.extend(iter::repeat("E").take(4));
1590        vec.extend(iter::repeat("I").take(4));
1591        vec.extend(iter::repeat("IJ").take(1));
1592        vec.extend(iter::repeat("D").take(1));
1593        vec.extend(iter::repeat("N").take(1));
1594        vec.extend(iter::repeat("O").take(6));
1595        vec.extend(iter::repeat("OE").take(1));
1596        vec.extend(iter::repeat("TH").take(1));
1597        vec.extend(iter::repeat("U").take(4));
1598        vec.extend(iter::repeat("Y").take(2));
1599        vec.extend(iter::repeat("a").take(6));
1600        vec.extend(iter::repeat("ae").take(1));
1601        vec.extend(iter::repeat("c").take(1));
1602        vec.extend(iter::repeat("e").take(4));
1603        vec.extend(iter::repeat("i").take(4));
1604        vec.extend(iter::repeat("ij").take(1));
1605        vec.extend(iter::repeat("d").take(1));
1606        vec.extend(iter::repeat("n").take(1));
1607        vec.extend(iter::repeat("o").take(6));
1608        vec.extend(iter::repeat("oe").take(1));
1609        vec.extend(iter::repeat("ss").take(1));
1610        vec.extend(iter::repeat("th").take(1));
1611        vec.extend(iter::repeat("u").take(4));
1612        vec.extend(iter::repeat("y").take(2));
1613        vec.extend(iter::repeat("fi").take(1));
1614        vec.extend(iter::repeat("fl").take(1));
1615        assert_eq!(folding_helper(latin1_string), vec);
1616    }
1617
1618    #[test]
1619    fn test_unmodified_letters() {
1620        assert_eq!(
1621            folding_using_raw_tokenizer_helper("§ ¦ ¤ END"),
1622            "§ ¦ ¤ END".to_string()
1623        );
1624    }
1625
1626    #[test]
1627    fn test_to_ascii() {
1628        let input = "Rámon".to_string();
1629        let mut buffer = String::new();
1630        to_ascii(&input, &mut buffer);
1631        assert_eq!("Ramon", buffer);
1632    }
1633
1634    #[test]
1635    fn test_all_foldings() {
1636        // those folding is a copy of
1637        // https://github.com/apache/lucene-solr/blob/28d187acd1e391723eb6e1b5445f22abf5580a80/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestASCIIFoldingFilter.java
1638        // useful regex to adapt to a Rust structure:
1639        // 1. Preg and replace folded:
1640        //    - **REGEX** |,"(.){3,5}", // Folded result|
1641        //    - **REPLACEMENT** ], "$1".to_string(), ), ( vec![
1642        // 2. Preg and replace characters:
1643        //    - **REGEX** |[\+]{0,1} "(.{1,3})"  // U\+|
1644        //    - **REPLACEMENT** "$1",  // U+
1645        let foldings: Vec<(&[&str], &str)> = vec![
1646            (
1647                &[
1648                    "À",  // U+00C0: LATIN CAPITAL LETTER A WITH GRAVE
1649                    "Á",  // U+00C1: LATIN CAPITAL LETTER A WITH ACUTE
1650                    "Â",  // U+00C2: LATIN CAPITAL LETTER A WITH CIRCUMFLEX
1651                    "Ã",  // U+00C3: LATIN CAPITAL LETTER A WITH TILDE
1652                    "Ä",  // U+00C4: LATIN CAPITAL LETTER A WITH DIAERESIS
1653                    "Å",  // U+00C5: LATIN CAPITAL LETTER A WITH RING ABOVE
1654                    "Ā",  // U+0100: LATIN CAPITAL LETTER A WITH MACRON
1655                    "Ă",  // U+0102: LATIN CAPITAL LETTER A WITH BREVE
1656                    "Ą",  // U+0104: LATIN CAPITAL LETTER A WITH OGONEK
1657                    "Ə",  // U+018F: LATIN CAPITAL LETTER SCHWA
1658                    "Ǎ",  // U+01CD: LATIN CAPITAL LETTER A WITH CARON
1659                    "Ǟ",  // U+01DE: LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
1660                    "Ǡ",  // U+01E0: LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
1661                    "Ǻ",  // U+01FA: LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
1662                    "Ȁ",  // U+0200: LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
1663                    "Ȃ",  // U+0202: LATIN CAPITAL LETTER A WITH INVERTED BREVE
1664                    "Ȧ",  // U+0226: LATIN CAPITAL LETTER A WITH DOT ABOVE
1665                    "Ⱥ",  // U+023A: LATIN CAPITAL LETTER A WITH STROKE
1666                    "ᴀ", // U+1D00: LATIN LETTER SMALL CAPITAL A
1667                    "Ḁ", // U+1E00: LATIN CAPITAL LETTER A WITH RING BELOW
1668                    "Ạ", // U+1EA0: LATIN CAPITAL LETTER A WITH DOT BELOW
1669                    "Ả", // U+1EA2: LATIN CAPITAL LETTER A WITH HOOK ABOVE
1670                    "Ấ", // U+1EA4: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
1671                    "Ầ", // U+1EA6: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
1672                    "Ẩ", // U+1EA8: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
1673                    "Ẫ", // U+1EAA: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
1674                    "Ậ", // U+1EAC: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
1675                    "Ắ", // U+1EAE: LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
1676                    "Ằ", // U+1EB0: LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
1677                    "Ẳ", // U+1EB2: LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
1678                    "Ẵ", // U+1EB4: LATIN CAPITAL LETTER A WITH BREVE AND TILDE
1679                    "Ặ", // U+1EB6: LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
1680                    "Ⓐ", // U+24B6: CIRCLED LATIN CAPITAL LETTER A
1681                    "A", // U+FF21: FULLWIDTH LATIN CAPITAL LETTER A
1682                ],
1683                "A",
1684            ),
1685            (
1686                &[
1687                    "à",  // U+00E0: LATIN SMALL LETTER A WITH GRAVE
1688                    "á",  // U+00E1: LATIN SMALL LETTER A WITH ACUTE
1689                    "â",  // U+00E2: LATIN SMALL LETTER A WITH CIRCUMFLEX
1690                    "ã",  // U+00E3: LATIN SMALL LETTER A WITH TILDE
1691                    "ä",  // U+00E4: LATIN SMALL LETTER A WITH DIAERESIS
1692                    "å",  // U+00E5: LATIN SMALL LETTER A WITH RING ABOVE
1693                    "ā",  // U+0101: LATIN SMALL LETTER A WITH MACRON
1694                    "ă",  // U+0103: LATIN SMALL LETTER A WITH BREVE
1695                    "ą",  // U+0105: LATIN SMALL LETTER A WITH OGONEK
1696                    "ǎ",  // U+01CE: LATIN SMALL LETTER A WITH CARON
1697                    "ǟ",  // U+01DF: LATIN SMALL LETTER A WITH DIAERESIS AND MACRON
1698                    "ǡ",  // U+01E1: LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON
1699                    "ǻ",  // U+01FB: LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE
1700                    "ȁ",  // U+0201: LATIN SMALL LETTER A WITH DOUBLE GRAVE
1701                    "ȃ",  // U+0203: LATIN SMALL LETTER A WITH INVERTED BREVE
1702                    "ȧ",  // U+0227: LATIN SMALL LETTER A WITH DOT ABOVE
1703                    "ɐ",  // U+0250: LATIN SMALL LETTER TURNED A
1704                    "ə",  // U+0259: LATIN SMALL LETTER SCHWA
1705                    "ɚ",  // U+025A: LATIN SMALL LETTER SCHWA WITH HOOK
1706                    "ᶏ", // U+1D8F: LATIN SMALL LETTER A WITH RETROFLEX HOOK
1707                    "ḁ", // U+1E01: LATIN SMALL LETTER A WITH RING BELOW
1708                    "ᶕ", // U+1D95: LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK
1709                    "ẚ", // U+1E9A: LATIN SMALL LETTER A WITH RIGHT HALF RING
1710                    "ạ", // U+1EA1: LATIN SMALL LETTER A WITH DOT BELOW
1711                    "ả", // U+1EA3: LATIN SMALL LETTER A WITH HOOK ABOVE
1712                    "ấ", // U+1EA5: LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE
1713                    "ầ", // U+1EA7: LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE
1714                    "ẩ", // U+1EA9: LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
1715                    "ẫ", // U+1EAB: LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE
1716                    "ậ", // U+1EAD: LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW
1717                    "ắ", // U+1EAF: LATIN SMALL LETTER A WITH BREVE AND ACUTE
1718                    "ằ", // U+1EB1: LATIN SMALL LETTER A WITH BREVE AND GRAVE
1719                    "ẳ", // U+1EB3: LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE
1720                    "ẵ", // U+1EB5: LATIN SMALL LETTER A WITH BREVE AND TILDE
1721                    "ặ", // U+1EB7: LATIN SMALL LETTER A WITH BREVE AND DOT BELOW
1722                    "ₐ", // U+2090: LATIN SUBSCRIPT SMALL LETTER A
1723                    "ₔ", // U+2094: LATIN SUBSCRIPT SMALL LETTER SCHWA
1724                    "ⓐ", // U+24D0: CIRCLED LATIN SMALL LETTER A
1725                    "ⱥ", // U+2C65: LATIN SMALL LETTER A WITH STROKE
1726                    "Ɐ", // U+2C6F: LATIN CAPITAL LETTER TURNED A
1727                    "a", // U+FF41: FULLWIDTH LATIN SMALL LETTER A
1728                ],
1729                "a",
1730            ),
1731            (
1732                &[
1733                    "Ꜳ", // U+A732: LATIN CAPITAL LETTER AA
1734                ],
1735                "AA",
1736            ),
1737            (
1738                &[
1739                    "Æ",  // U+00C6: LATIN CAPITAL LETTER AE
1740                    "Ǣ",  // U+01E2: LATIN CAPITAL LETTER AE WITH MACRON
1741                    "Ǽ",  // U+01FC: LATIN CAPITAL LETTER AE WITH ACUTE
1742                    "ᴁ", // U+1D01: LATIN LETTER SMALL CAPITAL AE
1743                ],
1744                "AE",
1745            ),
1746            (
1747                &[
1748                    "Ꜵ", // U+A734: LATIN CAPITAL LETTER AO
1749                ],
1750                "AO",
1751            ),
1752            (
1753                &[
1754                    "Ꜷ", // U+A736: LATIN CAPITAL LETTER AU
1755                ],
1756                "AU",
1757            ),
1758            (
1759                &[
1760                    "Ꜹ", // U+A738: LATIN CAPITAL LETTER AV
1761                    "Ꜻ", // U+A73A: LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR
1762                ],
1763                "AV",
1764            ),
1765            (
1766                &[
1767                    "Ꜽ", // U+A73C: LATIN CAPITAL LETTER AY
1768                ],
1769                "AY",
1770            ),
1771            (
1772                &[
1773                    "⒜", // U+249C: PARENTHESIZED LATIN SMALL LETTER A
1774                ],
1775                "(a)",
1776            ),
1777            (
1778                &[
1779                    "ꜳ", // U+A733: LATIN SMALL LETTER AA
1780                ],
1781                "aa",
1782            ),
1783            (
1784                &[
1785                    "æ",  // U+00E6: LATIN SMALL LETTER AE
1786                    "ǣ",  // U+01E3: LATIN SMALL LETTER AE WITH MACRON
1787                    "ǽ",  // U+01FD: LATIN SMALL LETTER AE WITH ACUTE
1788                    "ᴂ", // U+1D02: LATIN SMALL LETTER TURNED AE
1789                ],
1790                "ae",
1791            ),
1792            (
1793                &[
1794                    "ꜵ", // U+A735: LATIN SMALL LETTER AO
1795                ],
1796                "ao",
1797            ),
1798            (
1799                &[
1800                    "ꜷ", // U+A737: LATIN SMALL LETTER AU
1801                ],
1802                "au",
1803            ),
1804            (
1805                &[
1806                    "ꜹ", // U+A739: LATIN SMALL LETTER AV
1807                    "ꜻ", // U+A73B: LATIN SMALL LETTER AV WITH HORIZONTAL BAR
1808                ],
1809                "av",
1810            ),
1811            (
1812                &[
1813                    "ꜽ", // U+A73D: LATIN SMALL LETTER AY
1814                ],
1815                "ay",
1816            ),
1817            (
1818                &[
1819                    "Ɓ",  // U+0181: LATIN CAPITAL LETTER B WITH HOOK
1820                    "Ƃ",  // U+0182: LATIN CAPITAL LETTER B WITH TOPBAR
1821                    "Ƀ",  // U+0243: LATIN CAPITAL LETTER B WITH STROKE
1822                    "ʙ",  // U+0299: LATIN LETTER SMALL CAPITAL B
1823                    "ᴃ", // U+1D03: LATIN LETTER SMALL CAPITAL BARRED B
1824                    "Ḃ", // U+1E02: LATIN CAPITAL LETTER B WITH DOT ABOVE
1825                    "Ḅ", // U+1E04: LATIN CAPITAL LETTER B WITH DOT BELOW
1826                    "Ḇ", // U+1E06: LATIN CAPITAL LETTER B WITH LINE BELOW
1827                    "Ⓑ", // U+24B7: CIRCLED LATIN CAPITAL LETTER B
1828                    "B", // U+FF22: FULLWIDTH LATIN CAPITAL LETTER B
1829                ],
1830                "B",
1831            ),
1832            (
1833                &[
1834                    "ƀ",  // U+0180: LATIN SMALL LETTER B WITH STROKE
1835                    "ƃ",  // U+0183: LATIN SMALL LETTER B WITH TOPBAR
1836                    "ɓ",  // U+0253: LATIN SMALL LETTER B WITH HOOK
1837                    "ᵬ", // U+1D6C: LATIN SMALL LETTER B WITH MIDDLE TILDE
1838                    "ᶀ", // U+1D80: LATIN SMALL LETTER B WITH PALATAL HOOK
1839                    "ḃ", // U+1E03: LATIN SMALL LETTER B WITH DOT ABOVE
1840                    "ḅ", // U+1E05: LATIN SMALL LETTER B WITH DOT BELOW
1841                    "ḇ", // U+1E07: LATIN SMALL LETTER B WITH LINE BELOW
1842                    "ⓑ", // U+24D1: CIRCLED LATIN SMALL LETTER B
1843                    "b", // U+FF42: FULLWIDTH LATIN SMALL LETTER B
1844                ],
1845                "b",
1846            ),
1847            (
1848                &[
1849                    "⒝", // U+249D: PARENTHESIZED LATIN SMALL LETTER B
1850                ],
1851                "(b)",
1852            ),
1853            (
1854                &[
1855                    "Ç",  // U+00C7: LATIN CAPITAL LETTER C WITH CEDILLA
1856                    "Ć",  // U+0106: LATIN CAPITAL LETTER C WITH ACUTE
1857                    "Ĉ",  // U+0108: LATIN CAPITAL LETTER C WITH CIRCUMFLEX
1858                    "Ċ",  // U+010A: LATIN CAPITAL LETTER C WITH DOT ABOVE
1859                    "Č",  // U+010C: LATIN CAPITAL LETTER C WITH CARON
1860                    "Ƈ",  // U+0187: LATIN CAPITAL LETTER C WITH HOOK
1861                    "Ȼ",  // U+023B: LATIN CAPITAL LETTER C WITH STROKE
1862                    "ʗ",  // U+0297: LATIN LETTER STRETCHED C
1863                    "ᴄ", // U+1D04: LATIN LETTER SMALL CAPITAL C
1864                    "Ḉ", // U+1E08: LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
1865                    "Ⓒ", // U+24B8: CIRCLED LATIN CAPITAL LETTER C
1866                    "C", // U+FF23: FULLWIDTH LATIN CAPITAL LETTER C
1867                ],
1868                "C",
1869            ),
1870            (
1871                &[
1872                    "ç",  // U+00E7: LATIN SMALL LETTER C WITH CEDILLA
1873                    "ć",  // U+0107: LATIN SMALL LETTER C WITH ACUTE
1874                    "ĉ",  // U+0109: LATIN SMALL LETTER C WITH CIRCUMFLEX
1875                    "ċ",  // U+010B: LATIN SMALL LETTER C WITH DOT ABOVE
1876                    "č",  // U+010D: LATIN SMALL LETTER C WITH CARON
1877                    "ƈ",  // U+0188: LATIN SMALL LETTER C WITH HOOK
1878                    "ȼ",  // U+023C: LATIN SMALL LETTER C WITH STROKE
1879                    "ɕ",  // U+0255: LATIN SMALL LETTER C WITH CURL
1880                    "ḉ", // U+1E09: LATIN SMALL LETTER C WITH CEDILLA AND ACUTE
1881                    "ↄ", // U+2184: LATIN SMALL LETTER REVERSED C
1882                    "ⓒ", // U+24D2: CIRCLED LATIN SMALL LETTER C
1883                    "Ꜿ", // U+A73E: LATIN CAPITAL LETTER REVERSED C WITH DOT
1884                    "ꜿ", // U+A73F: LATIN SMALL LETTER REVERSED C WITH DOT
1885                    "c", // U+FF43: FULLWIDTH LATIN SMALL LETTER C
1886                ],
1887                "c",
1888            ),
1889            (
1890                &[
1891                    "⒞", // U+249E: PARENTHESIZED LATIN SMALL LETTER C
1892                ],
1893                "(c)",
1894            ),
1895            (
1896                &[
1897                    "Ð",  // U+00D0: LATIN CAPITAL LETTER ETH
1898                    "Ď",  // U+010E: LATIN CAPITAL LETTER D WITH CARON
1899                    "Đ",  // U+0110: LATIN CAPITAL LETTER D WITH STROKE
1900                    "Ɖ",  // U+0189: LATIN CAPITAL LETTER AFRICAN D
1901                    "Ɗ",  // U+018A: LATIN CAPITAL LETTER D WITH HOOK
1902                    "Ƌ",  // U+018B: LATIN CAPITAL LETTER D WITH TOPBAR
1903                    "ᴅ", // U+1D05: LATIN LETTER SMALL CAPITAL D
1904                    "ᴆ", // U+1D06: LATIN LETTER SMALL CAPITAL ETH
1905                    "Ḋ", // U+1E0A: LATIN CAPITAL LETTER D WITH DOT ABOVE
1906                    "Ḍ", // U+1E0C: LATIN CAPITAL LETTER D WITH DOT BELOW
1907                    "Ḏ", // U+1E0E: LATIN CAPITAL LETTER D WITH LINE BELOW
1908                    "Ḑ", // U+1E10: LATIN CAPITAL LETTER D WITH CEDILLA
1909                    "Ḓ", // U+1E12: LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
1910                    "Ⓓ", // U+24B9: CIRCLED LATIN CAPITAL LETTER D
1911                    "Ꝺ", // U+A779: LATIN CAPITAL LETTER INSULAR D
1912                    "D", // U+FF24: FULLWIDTH LATIN CAPITAL LETTER D
1913                ],
1914                "D",
1915            ),
1916            (
1917                &[
1918                    "ð",  // U+00F0: LATIN SMALL LETTER ETH
1919                    "ď",  // U+010F: LATIN SMALL LETTER D WITH CARON
1920                    "đ",  // U+0111: LATIN SMALL LETTER D WITH STROKE
1921                    "ƌ",  // U+018C: LATIN SMALL LETTER D WITH TOPBAR
1922                    "ȡ",  // U+0221: LATIN SMALL LETTER D WITH CURL
1923                    "ɖ",  // U+0256: LATIN SMALL LETTER D WITH TAIL
1924                    "ɗ",  // U+0257: LATIN SMALL LETTER D WITH HOOK
1925                    "ᵭ", // U+1D6D: LATIN SMALL LETTER D WITH MIDDLE TILDE
1926                    "ᶁ", // U+1D81: LATIN SMALL LETTER D WITH PALATAL HOOK
1927                    "ᶑ", // U+1D91: LATIN SMALL LETTER D WITH HOOK AND TAIL
1928                    "ḋ", // U+1E0B: LATIN SMALL LETTER D WITH DOT ABOVE
1929                    "ḍ", // U+1E0D: LATIN SMALL LETTER D WITH DOT BELOW
1930                    "ḏ", // U+1E0F: LATIN SMALL LETTER D WITH LINE BELOW
1931                    "ḑ", // U+1E11: LATIN SMALL LETTER D WITH CEDILLA
1932                    "ḓ", // U+1E13: LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW
1933                    "ⓓ", // U+24D3: CIRCLED LATIN SMALL LETTER D
1934                    "ꝺ", // U+A77A: LATIN SMALL LETTER INSULAR D
1935                    "d", // U+FF44: FULLWIDTH LATIN SMALL LETTER D
1936                ],
1937                "d",
1938            ),
1939            (
1940                &[
1941                    "DŽ", // U+01C4: LATIN CAPITAL LETTER DZ WITH CARON
1942                    "DZ", // U+01F1: LATIN CAPITAL LETTER DZ
1943                ],
1944                "DZ",
1945            ),
1946            (
1947                &[
1948                    "Dž", // U+01C5: LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
1949                    "Dz", // U+01F2: LATIN CAPITAL LETTER D WITH SMALL LETTER Z
1950                ],
1951                "Dz",
1952            ),
1953            (
1954                &[
1955                    "⒟", // U+249F: PARENTHESIZED LATIN SMALL LETTER D
1956                ],
1957                "(d)",
1958            ),
1959            (
1960                &[
1961                    "ȸ", // U+0238: LATIN SMALL LETTER DB DIGRAPH
1962                ],
1963                "db",
1964            ),
1965            (
1966                &[
1967                    "dž", // U+01C6: LATIN SMALL LETTER DZ WITH CARON
1968                    "dz", // U+01F3: LATIN SMALL LETTER DZ
1969                    "ʣ", // U+02A3: LATIN SMALL LETTER DZ DIGRAPH
1970                    "ʥ", // U+02A5: LATIN SMALL LETTER DZ DIGRAPH WITH CURL
1971                ],
1972                "dz",
1973            ),
1974            (
1975                &[
1976                    "È",  // U+00C8: LATIN CAPITAL LETTER E WITH GRAVE
1977                    "É",  // U+00C9: LATIN CAPITAL LETTER E WITH ACUTE
1978                    "Ê",  // U+00CA: LATIN CAPITAL LETTER E WITH CIRCUMFLEX
1979                    "Ë",  // U+00CB: LATIN CAPITAL LETTER E WITH DIAERESIS
1980                    "Ē",  // U+0112: LATIN CAPITAL LETTER E WITH MACRON
1981                    "Ĕ",  // U+0114: LATIN CAPITAL LETTER E WITH BREVE
1982                    "Ė",  // U+0116: LATIN CAPITAL LETTER E WITH DOT ABOVE
1983                    "Ę",  // U+0118: LATIN CAPITAL LETTER E WITH OGONEK
1984                    "Ě",  // U+011A: LATIN CAPITAL LETTER E WITH CARON
1985                    "Ǝ",  // U+018E: LATIN CAPITAL LETTER REVERSED E
1986                    "Ɛ",  // U+0190: LATIN CAPITAL LETTER OPEN E
1987                    "Ȅ",  // U+0204: LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
1988                    "Ȇ",  // U+0206: LATIN CAPITAL LETTER E WITH INVERTED BREVE
1989                    "Ȩ",  // U+0228: LATIN CAPITAL LETTER E WITH CEDILLA
1990                    "Ɇ",  // U+0246: LATIN CAPITAL LETTER E WITH STROKE
1991                    "ᴇ", // U+1D07: LATIN LETTER SMALL CAPITAL E
1992                    "Ḕ", // U+1E14: LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
1993                    "Ḗ", // U+1E16: LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
1994                    "Ḙ", // U+1E18: LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
1995                    "Ḛ", // U+1E1A: LATIN CAPITAL LETTER E WITH TILDE BELOW
1996                    "Ḝ", // U+1E1C: LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
1997                    "Ẹ", // U+1EB8: LATIN CAPITAL LETTER E WITH DOT BELOW
1998                    "Ẻ", // U+1EBA: LATIN CAPITAL LETTER E WITH HOOK ABOVE
1999                    "Ẽ", // U+1EBC: LATIN CAPITAL LETTER E WITH TILDE
2000                    "Ế", // U+1EBE: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
2001                    "Ề", // U+1EC0: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
2002                    "Ể", // U+1EC2: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
2003                    "Ễ", // U+1EC4: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
2004                    "Ệ", // U+1EC6: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
2005                    "Ⓔ", // U+24BA: CIRCLED LATIN CAPITAL LETTER E
2006                    "ⱻ", // U+2C7B: LATIN LETTER SMALL CAPITAL TURNED E
2007                    "E", // U+FF25: FULLWIDTH LATIN CAPITAL LETTER E
2008                ],
2009                "E",
2010            ),
2011            (
2012                &[
2013                    "è",  // U+00E8: LATIN SMALL LETTER E WITH GRAVE
2014                    "é",  // U+00E9: LATIN SMALL LETTER E WITH ACUTE
2015                    "ê",  // U+00EA: LATIN SMALL LETTER E WITH CIRCUMFLEX
2016                    "ë",  // U+00EB: LATIN SMALL LETTER E WITH DIAERESIS
2017                    "ē",  // U+0113: LATIN SMALL LETTER E WITH MACRON
2018                    "ĕ",  // U+0115: LATIN SMALL LETTER E WITH BREVE
2019                    "ė",  // U+0117: LATIN SMALL LETTER E WITH DOT ABOVE
2020                    "ę",  // U+0119: LATIN SMALL LETTER E WITH OGONEK
2021                    "ě",  // U+011B: LATIN SMALL LETTER E WITH CARON
2022                    "ǝ",  // U+01DD: LATIN SMALL LETTER TURNED E
2023                    "ȅ",  // U+0205: LATIN SMALL LETTER E WITH DOUBLE GRAVE
2024                    "ȇ",  // U+0207: LATIN SMALL LETTER E WITH INVERTED BREVE
2025                    "ȩ",  // U+0229: LATIN SMALL LETTER E WITH CEDILLA
2026                    "ɇ",  // U+0247: LATIN SMALL LETTER E WITH STROKE
2027                    "ɘ",  // U+0258: LATIN SMALL LETTER REVERSED E
2028                    "ɛ",  // U+025B: LATIN SMALL LETTER OPEN E
2029                    "ɜ",  // U+025C: LATIN SMALL LETTER REVERSED OPEN E
2030                    "ɝ",  // U+025D: LATIN SMALL LETTER REVERSED OPEN E WITH HOOK
2031                    "ɞ",  // U+025E: LATIN SMALL LETTER CLOSED REVERSED OPEN E
2032                    "ʚ",  // U+029A: LATIN SMALL LETTER CLOSED OPEN E
2033                    "ᴈ", // U+1D08: LATIN SMALL LETTER TURNED OPEN E
2034                    "ᶒ", // U+1D92: LATIN SMALL LETTER E WITH RETROFLEX HOOK
2035                    "ᶓ", // U+1D93: LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK
2036                    "ᶔ", // U+1D94: LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK
2037                    "ḕ", // U+1E15: LATIN SMALL LETTER E WITH MACRON AND GRAVE
2038                    "ḗ", // U+1E17: LATIN SMALL LETTER E WITH MACRON AND ACUTE
2039                    "ḙ", // U+1E19: LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW
2040                    "ḛ", // U+1E1B: LATIN SMALL LETTER E WITH TILDE BELOW
2041                    "ḝ", // U+1E1D: LATIN SMALL LETTER E WITH CEDILLA AND BREVE
2042                    "ẹ", // U+1EB9: LATIN SMALL LETTER E WITH DOT BELOW
2043                    "ẻ", // U+1EBB: LATIN SMALL LETTER E WITH HOOK ABOVE
2044                    "ẽ", // U+1EBD: LATIN SMALL LETTER E WITH TILDE
2045                    "ế", // U+1EBF: LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE
2046                    "ề", // U+1EC1: LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE
2047                    "ể", // U+1EC3: LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
2048                    "ễ", // U+1EC5: LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE
2049                    "ệ", // U+1EC7: LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW
2050                    "ₑ", // U+2091: LATIN SUBSCRIPT SMALL LETTER E
2051                    "ⓔ", // U+24D4: CIRCLED LATIN SMALL LETTER E
2052                    "ⱸ", // U+2C78: LATIN SMALL LETTER E WITH NOTCH
2053                    "e", // U+FF45: FULLWIDTH LATIN SMALL LETTER E
2054                ],
2055                "e",
2056            ),
2057            (
2058                &[
2059                    "⒠", // U+24A0: PARENTHESIZED LATIN SMALL LETTER E
2060                ],
2061                "(e)",
2062            ),
2063            (
2064                &[
2065                    "Ƒ",  // U+0191: LATIN CAPITAL LETTER F WITH HOOK
2066                    "Ḟ", // U+1E1E: LATIN CAPITAL LETTER F WITH DOT ABOVE
2067                    "Ⓕ", // U+24BB: CIRCLED LATIN CAPITAL LETTER F
2068                    "ꜰ", // U+A730: LATIN LETTER SMALL CAPITAL F
2069                    "Ꝼ", // U+A77B: LATIN CAPITAL LETTER INSULAR F
2070                    "ꟻ", // U+A7FB: LATIN EPIGRAPHIC LETTER REVERSED F
2071                    "F", // U+FF26: FULLWIDTH LATIN CAPITAL LETTER F
2072                ],
2073                "F",
2074            ),
2075            (
2076                &[
2077                    "ƒ",  // U+0192: LATIN SMALL LETTER F WITH HOOK
2078                    "ᵮ", // U+1D6E: LATIN SMALL LETTER F WITH MIDDLE TILDE
2079                    "ᶂ", // U+1D82: LATIN SMALL LETTER F WITH PALATAL HOOK
2080                    "ḟ", // U+1E1F: LATIN SMALL LETTER F WITH DOT ABOVE
2081                    "ẛ", // U+1E9B: LATIN SMALL LETTER LONG S WITH DOT ABOVE
2082                    "ⓕ", // U+24D5: CIRCLED LATIN SMALL LETTER F
2083                    "ꝼ", // U+A77C: LATIN SMALL LETTER INSULAR F
2084                    "f", // U+FF46: FULLWIDTH LATIN SMALL LETTER F
2085                ],
2086                "f",
2087            ),
2088            (
2089                &[
2090                    "⒡", // U+24A1: PARENTHESIZED LATIN SMALL LETTER F
2091                ],
2092                "(f)",
2093            ),
2094            (
2095                &[
2096                    "ff", // U+FB00: LATIN SMALL LIGATURE FF
2097                ],
2098                "ff",
2099            ),
2100            (
2101                &[
2102                    "ffi", // U+FB03: LATIN SMALL LIGATURE FFI
2103                ],
2104                "ffi",
2105            ),
2106            (
2107                &[
2108                    "ffl", // U+FB04: LATIN SMALL LIGATURE FFL
2109                ],
2110                "ffl",
2111            ),
2112            (
2113                &[
2114                    "fi", // U+FB01: LATIN SMALL LIGATURE FI
2115                ],
2116                "fi",
2117            ),
2118            (
2119                &[
2120                    "fl", // U+FB02: LATIN SMALL LIGATURE FL
2121                ],
2122                "fl",
2123            ),
2124            (
2125                &[
2126                    "Ĝ",  // U+011C: LATIN CAPITAL LETTER G WITH CIRCUMFLEX
2127                    "Ğ",  // U+011E: LATIN CAPITAL LETTER G WITH BREVE
2128                    "Ġ",  // U+0120: LATIN CAPITAL LETTER G WITH DOT ABOVE
2129                    "Ģ",  // U+0122: LATIN CAPITAL LETTER G WITH CEDILLA
2130                    "Ɠ",  // U+0193: LATIN CAPITAL LETTER G WITH HOOK
2131                    "Ǥ",  // U+01E4: LATIN CAPITAL LETTER G WITH STROKE
2132                    "ǥ",  // U+01E5: LATIN SMALL LETTER G WITH STROKE
2133                    "Ǧ",  // U+01E6: LATIN CAPITAL LETTER G WITH CARON
2134                    "ǧ",  // U+01E7: LATIN SMALL LETTER G WITH CARON
2135                    "Ǵ",  // U+01F4: LATIN CAPITAL LETTER G WITH ACUTE
2136                    "ɢ",  // U+0262: LATIN LETTER SMALL CAPITAL G
2137                    "ʛ",  // U+029B: LATIN LETTER SMALL CAPITAL G WITH HOOK
2138                    "Ḡ", // U+1E20: LATIN CAPITAL LETTER G WITH MACRON
2139                    "Ⓖ", // U+24BC: CIRCLED LATIN CAPITAL LETTER G
2140                    "Ᵹ", // U+A77D: LATIN CAPITAL LETTER INSULAR G
2141                    "Ꝿ", // U+A77E: LATIN CAPITAL LETTER TURNED INSULAR G
2142                    "G", // U+FF27: FULLWIDTH LATIN CAPITAL LETTER G
2143                ],
2144                "G",
2145            ),
2146            (
2147                &[
2148                    "ĝ",  // U+011D: LATIN SMALL LETTER G WITH CIRCUMFLEX
2149                    "ğ",  // U+011F: LATIN SMALL LETTER G WITH BREVE
2150                    "ġ",  // U+0121: LATIN SMALL LETTER G WITH DOT ABOVE
2151                    "ģ",  // U+0123: LATIN SMALL LETTER G WITH CEDILLA
2152                    "ǵ",  // U+01F5: LATIN SMALL LETTER G WITH ACUTE
2153                    "ɠ",  // U+0260: LATIN SMALL LETTER G WITH HOOK
2154                    "ɡ",  // U+0261: LATIN SMALL LETTER SCRIPT G
2155                    "ᵷ", // U+1D77: LATIN SMALL LETTER TURNED G
2156                    "ᵹ", // U+1D79: LATIN SMALL LETTER INSULAR G
2157                    "ᶃ", // U+1D83: LATIN SMALL LETTER G WITH PALATAL HOOK
2158                    "ḡ", // U+1E21: LATIN SMALL LETTER G WITH MACRON
2159                    "ⓖ", // U+24D6: CIRCLED LATIN SMALL LETTER G
2160                    "ꝿ", // U+A77F: LATIN SMALL LETTER TURNED INSULAR G
2161                    "g", // U+FF47: FULLWIDTH LATIN SMALL LETTER G
2162                ],
2163                "g",
2164            ),
2165            (
2166                &[
2167                    "⒢", // U+24A2: PARENTHESIZED LATIN SMALL LETTER G
2168                ],
2169                "(g)",
2170            ),
2171            (
2172                &[
2173                    "Ĥ",  // U+0124: LATIN CAPITAL LETTER H WITH CIRCUMFLEX
2174                    "Ħ",  // U+0126: LATIN CAPITAL LETTER H WITH STROKE
2175                    "Ȟ",  // U+021E: LATIN CAPITAL LETTER H WITH CARON
2176                    "ʜ",  // U+029C: LATIN LETTER SMALL CAPITAL H
2177                    "Ḣ", // U+1E22: LATIN CAPITAL LETTER H WITH DOT ABOVE
2178                    "Ḥ", // U+1E24: LATIN CAPITAL LETTER H WITH DOT BELOW
2179                    "Ḧ", // U+1E26: LATIN CAPITAL LETTER H WITH DIAERESIS
2180                    "Ḩ", // U+1E28: LATIN CAPITAL LETTER H WITH CEDILLA
2181                    "Ḫ", // U+1E2A: LATIN CAPITAL LETTER H WITH BREVE BELOW
2182                    "Ⓗ", // U+24BD: CIRCLED LATIN CAPITAL LETTER H
2183                    "Ⱨ", // U+2C67: LATIN CAPITAL LETTER H WITH DESCENDER
2184                    "Ⱶ", // U+2C75: LATIN CAPITAL LETTER HALF H
2185                    "H", // U+FF28: FULLWIDTH LATIN CAPITAL LETTER H
2186                ],
2187                "H",
2188            ),
2189            (
2190                &[
2191                    "ĥ",  // U+0125: LATIN SMALL LETTER H WITH CIRCUMFLEX
2192                    "ħ",  // U+0127: LATIN SMALL LETTER H WITH STROKE
2193                    "ȟ",  // U+021F: LATIN SMALL LETTER H WITH CARON
2194                    "ɥ",  // U+0265: LATIN SMALL LETTER TURNED H
2195                    "ɦ",  // U+0266: LATIN SMALL LETTER H WITH HOOK
2196                    "ʮ",  // U+02AE: LATIN SMALL LETTER TURNED H WITH FISHHOOK
2197                    "ʯ",  // U+02AF: LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
2198                    "ḣ", // U+1E23: LATIN SMALL LETTER H WITH DOT ABOVE
2199                    "ḥ", // U+1E25: LATIN SMALL LETTER H WITH DOT BELOW
2200                    "ḧ", // U+1E27: LATIN SMALL LETTER H WITH DIAERESIS
2201                    "ḩ", // U+1E29: LATIN SMALL LETTER H WITH CEDILLA
2202                    "ḫ", // U+1E2B: LATIN SMALL LETTER H WITH BREVE BELOW
2203                    "ẖ", // U+1E96: LATIN SMALL LETTER H WITH LINE BELOW
2204                    "ⓗ", // U+24D7: CIRCLED LATIN SMALL LETTER H
2205                    "ⱨ", // U+2C68: LATIN SMALL LETTER H WITH DESCENDER
2206                    "ⱶ", // U+2C76: LATIN SMALL LETTER HALF H
2207                    "h", // U+FF48: FULLWIDTH LATIN SMALL LETTER H
2208                ],
2209                "h",
2210            ),
2211            (
2212                &[
2213                    "Ƕ", // U+01F6: LATIN CAPITAL LETTER HWAIR
2214                ],
2215                "HV",
2216            ),
2217            (
2218                &[
2219                    "⒣", // U+24A3: PARENTHESIZED LATIN SMALL LETTER H
2220                ],
2221                "(h)",
2222            ),
2223            (
2224                &[
2225                    "ƕ", // U+0195: LATIN SMALL LETTER HV
2226                ],
2227                "hv",
2228            ),
2229            (
2230                &[
2231                    "Ì",  // U+00CC: LATIN CAPITAL LETTER I WITH GRAVE
2232                    "Í",  // U+00CD: LATIN CAPITAL LETTER I WITH ACUTE
2233                    "Î",  // U+00CE: LATIN CAPITAL LETTER I WITH CIRCUMFLEX
2234                    "Ï",  // U+00CF: LATIN CAPITAL LETTER I WITH DIAERESIS
2235                    "Ĩ",  // U+0128: LATIN CAPITAL LETTER I WITH TILDE
2236                    "Ī",  // U+012A: LATIN CAPITAL LETTER I WITH MACRON
2237                    "Ĭ",  // U+012C: LATIN CAPITAL LETTER I WITH BREVE
2238                    "Į",  // U+012E: LATIN CAPITAL LETTER I WITH OGONEK
2239                    "İ",  // U+0130: LATIN CAPITAL LETTER I WITH DOT ABOVE
2240                    "Ɩ",  // U+0196: LATIN CAPITAL LETTER IOTA
2241                    "Ɨ",  // U+0197: LATIN CAPITAL LETTER I WITH STROKE
2242                    "Ǐ",  // U+01CF: LATIN CAPITAL LETTER I WITH CARON
2243                    "Ȉ",  // U+0208: LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
2244                    "Ȋ",  // U+020A: LATIN CAPITAL LETTER I WITH INVERTED BREVE
2245                    "ɪ",  // U+026A: LATIN LETTER SMALL CAPITAL I
2246                    "ᵻ", // U+1D7B: LATIN SMALL CAPITAL LETTER I WITH STROKE
2247                    "Ḭ", // U+1E2C: LATIN CAPITAL LETTER I WITH TILDE BELOW
2248                    "Ḯ", // U+1E2E: LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
2249                    "Ỉ", // U+1EC8: LATIN CAPITAL LETTER I WITH HOOK ABOVE
2250                    "Ị", // U+1ECA: LATIN CAPITAL LETTER I WITH DOT BELOW
2251                    "Ⓘ", // U+24BE: CIRCLED LATIN CAPITAL LETTER I
2252                    "ꟾ", // U+A7FE: LATIN EPIGRAPHIC LETTER I LONGA
2253                    "I", // U+FF29: FULLWIDTH LATIN CAPITAL LETTER I
2254                ],
2255                "I",
2256            ),
2257            (
2258                &[
2259                    "ì",  // U+00EC: LATIN SMALL LETTER I WITH GRAVE
2260                    "í",  // U+00ED: LATIN SMALL LETTER I WITH ACUTE
2261                    "î",  // U+00EE: LATIN SMALL LETTER I WITH CIRCUMFLEX
2262                    "ï",  // U+00EF: LATIN SMALL LETTER I WITH DIAERESIS
2263                    "ĩ",  // U+0129: LATIN SMALL LETTER I WITH TILDE
2264                    "ī",  // U+012B: LATIN SMALL LETTER I WITH MACRON
2265                    "ĭ",  // U+012D: LATIN SMALL LETTER I WITH BREVE
2266                    "į",  // U+012F: LATIN SMALL LETTER I WITH OGONEK
2267                    "ı",  // U+0131: LATIN SMALL LETTER DOTLESS I
2268                    "ǐ",  // U+01D0: LATIN SMALL LETTER I WITH CARON
2269                    "ȉ",  // U+0209: LATIN SMALL LETTER I WITH DOUBLE GRAVE
2270                    "ȋ",  // U+020B: LATIN SMALL LETTER I WITH INVERTED BREVE
2271                    "ɨ",  // U+0268: LATIN SMALL LETTER I WITH STROKE
2272                    "ᴉ", // U+1D09: LATIN SMALL LETTER TURNED I
2273                    "ᵢ", // U+1D62: LATIN SUBSCRIPT SMALL LETTER I
2274                    "ᵼ", // U+1D7C: LATIN SMALL LETTER IOTA WITH STROKE
2275                    "ᶖ", // U+1D96: LATIN SMALL LETTER I WITH RETROFLEX HOOK
2276                    "ḭ", // U+1E2D: LATIN SMALL LETTER I WITH TILDE BELOW
2277                    "ḯ", // U+1E2F: LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE
2278                    "ỉ", // U+1EC9: LATIN SMALL LETTER I WITH HOOK ABOVE
2279                    "ị", // U+1ECB: LATIN SMALL LETTER I WITH DOT BELOW
2280                    "ⁱ", // U+2071: SUPERSCRIPT LATIN SMALL LETTER I
2281                    "ⓘ", // U+24D8: CIRCLED LATIN SMALL LETTER I
2282                    "i", // U+FF49: FULLWIDTH LATIN SMALL LETTER I
2283                ],
2284                "i",
2285            ),
2286            (
2287                &[
2288                    "IJ", // U+0132: LATIN CAPITAL LIGATURE IJ
2289                ],
2290                "IJ",
2291            ),
2292            (
2293                &[
2294                    "⒤", // U+24A4: PARENTHESIZED LATIN SMALL LETTER I
2295                ],
2296                "(i)",
2297            ),
2298            (
2299                &[
2300                    "ij", // U+0133: LATIN SMALL LIGATURE IJ
2301                ],
2302                "ij",
2303            ),
2304            (
2305                &[
2306                    "Ĵ",  // U+0134: LATIN CAPITAL LETTER J WITH CIRCUMFLEX
2307                    "Ɉ",  // U+0248: LATIN CAPITAL LETTER J WITH STROKE
2308                    "ᴊ", // U+1D0A: LATIN LETTER SMALL CAPITAL J
2309                    "Ⓙ", // U+24BF: CIRCLED LATIN CAPITAL LETTER J
2310                    "J", // U+FF2A: FULLWIDTH LATIN CAPITAL LETTER J
2311                ],
2312                "J",
2313            ),
2314            (
2315                &[
2316                    "ĵ",  // U+0135: LATIN SMALL LETTER J WITH CIRCUMFLEX
2317                    "ǰ",  // U+01F0: LATIN SMALL LETTER J WITH CARON
2318                    "ȷ",  // U+0237: LATIN SMALL LETTER DOTLESS J
2319                    "ɉ",  // U+0249: LATIN SMALL LETTER J WITH STROKE
2320                    "ɟ",  // U+025F: LATIN SMALL LETTER DOTLESS J WITH STROKE
2321                    "ʄ",  // U+0284: LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK
2322                    "ʝ",  // U+029D: LATIN SMALL LETTER J WITH CROSSED-TAIL
2323                    "ⓙ", // U+24D9: CIRCLED LATIN SMALL LETTER J
2324                    "ⱼ", // U+2C7C: LATIN SUBSCRIPT SMALL LETTER J
2325                    "j", // U+FF4A: FULLWIDTH LATIN SMALL LETTER J
2326                ],
2327                "j",
2328            ),
2329            (
2330                &[
2331                    "⒥", // U+24A5: PARENTHESIZED LATIN SMALL LETTER J
2332                ],
2333                "(j)",
2334            ),
2335            (
2336                &[
2337                    "Ķ",  // U+0136: LATIN CAPITAL LETTER K WITH CEDILLA
2338                    "Ƙ",  // U+0198: LATIN CAPITAL LETTER K WITH HOOK
2339                    "Ǩ",  // U+01E8: LATIN CAPITAL LETTER K WITH CARON
2340                    "ᴋ", // U+1D0B: LATIN LETTER SMALL CAPITAL K
2341                    "Ḱ", // U+1E30: LATIN CAPITAL LETTER K WITH ACUTE
2342                    "Ḳ", // U+1E32: LATIN CAPITAL LETTER K WITH DOT BELOW
2343                    "Ḵ", // U+1E34: LATIN CAPITAL LETTER K WITH LINE BELOW
2344                    "Ⓚ", // U+24C0: CIRCLED LATIN CAPITAL LETTER K
2345                    "Ⱪ", // U+2C69: LATIN CAPITAL LETTER K WITH DESCENDER
2346                    "Ꝁ", // U+A740: LATIN CAPITAL LETTER K WITH STROKE
2347                    "Ꝃ", // U+A742: LATIN CAPITAL LETTER K WITH DIAGONAL STROKE
2348                    "Ꝅ", // U+A744: LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE
2349                    "K", // U+FF2B: FULLWIDTH LATIN CAPITAL LETTER K
2350                ],
2351                "K",
2352            ),
2353            (
2354                &[
2355                    "ķ",  // U+0137: LATIN SMALL LETTER K WITH CEDILLA
2356                    "ƙ",  // U+0199: LATIN SMALL LETTER K WITH HOOK
2357                    "ǩ",  // U+01E9: LATIN SMALL LETTER K WITH CARON
2358                    "ʞ",  // U+029E: LATIN SMALL LETTER TURNED K
2359                    "ᶄ", // U+1D84: LATIN SMALL LETTER K WITH PALATAL HOOK
2360                    "ḱ", // U+1E31: LATIN SMALL LETTER K WITH ACUTE
2361                    "ḳ", // U+1E33: LATIN SMALL LETTER K WITH DOT BELOW
2362                    "ḵ", // U+1E35: LATIN SMALL LETTER K WITH LINE BELOW
2363                    "ⓚ", // U+24DA: CIRCLED LATIN SMALL LETTER K
2364                    "ⱪ", // U+2C6A: LATIN SMALL LETTER K WITH DESCENDER
2365                    "ꝁ", // U+A741: LATIN SMALL LETTER K WITH STROKE
2366                    "ꝃ", // U+A743: LATIN SMALL LETTER K WITH DIAGONAL STROKE
2367                    "ꝅ", // U+A745: LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE
2368                    "k", // U+FF4B: FULLWIDTH LATIN SMALL LETTER K
2369                ],
2370                "k",
2371            ),
2372            (
2373                &[
2374                    "⒦", // U+24A6: PARENTHESIZED LATIN SMALL LETTER K
2375                ],
2376                "(k)",
2377            ),
2378            (
2379                &[
2380                    "Ĺ",  // U+0139: LATIN CAPITAL LETTER L WITH ACUTE
2381                    "Ļ",  // U+013B: LATIN CAPITAL LETTER L WITH CEDILLA
2382                    "Ľ",  // U+013D: LATIN CAPITAL LETTER L WITH CARON
2383                    "Ŀ",  // U+013F: LATIN CAPITAL LETTER L WITH MIDDLE DOT
2384                    "Ł",  // U+0141: LATIN CAPITAL LETTER L WITH STROKE
2385                    "Ƚ",  // U+023D: LATIN CAPITAL LETTER L WITH BAR
2386                    "ʟ",  // U+029F: LATIN LETTER SMALL CAPITAL L
2387                    "ᴌ", // U+1D0C: LATIN LETTER SMALL CAPITAL L WITH STROKE
2388                    "Ḷ", // U+1E36: LATIN CAPITAL LETTER L WITH DOT BELOW
2389                    "Ḹ", // U+1E38: LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
2390                    "Ḻ", // U+1E3A: LATIN CAPITAL LETTER L WITH LINE BELOW
2391                    "Ḽ", // U+1E3C: LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
2392                    "Ⓛ", // U+24C1: CIRCLED LATIN CAPITAL LETTER L
2393                    "Ⱡ", // U+2C60: LATIN CAPITAL LETTER L WITH DOUBLE BAR
2394                    "Ɫ", // U+2C62: LATIN CAPITAL LETTER L WITH MIDDLE TILDE
2395                    "Ꝇ", // U+A746: LATIN CAPITAL LETTER BROKEN L
2396                    "Ꝉ", // U+A748: LATIN CAPITAL LETTER L WITH HIGH STROKE
2397                    "Ꞁ", // U+A780: LATIN CAPITAL LETTER TURNED L
2398                    "L", // U+FF2C: FULLWIDTH LATIN CAPITAL LETTER L
2399                ],
2400                "L",
2401            ),
2402            (
2403                &[
2404                    "ĺ",  // U+013A: LATIN SMALL LETTER L WITH ACUTE
2405                    "ļ",  // U+013C: LATIN SMALL LETTER L WITH CEDILLA
2406                    "ľ",  // U+013E: LATIN SMALL LETTER L WITH CARON
2407                    "ŀ",  // U+0140: LATIN SMALL LETTER L WITH MIDDLE DOT
2408                    "ł",  // U+0142: LATIN SMALL LETTER L WITH STROKE
2409                    "ƚ",  // U+019A: LATIN SMALL LETTER L WITH BAR
2410                    "ȴ",  // U+0234: LATIN SMALL LETTER L WITH CURL
2411                    "ɫ",  // U+026B: LATIN SMALL LETTER L WITH MIDDLE TILDE
2412                    "ɬ",  // U+026C: LATIN SMALL LETTER L WITH BELT
2413                    "ɭ",  // U+026D: LATIN SMALL LETTER L WITH RETROFLEX HOOK
2414                    "ᶅ", // U+1D85: LATIN SMALL LETTER L WITH PALATAL HOOK
2415                    "ḷ", // U+1E37: LATIN SMALL LETTER L WITH DOT BELOW
2416                    "ḹ", // U+1E39: LATIN SMALL LETTER L WITH DOT BELOW AND MACRON
2417                    "ḻ", // U+1E3B: LATIN SMALL LETTER L WITH LINE BELOW
2418                    "ḽ", // U+1E3D: LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW
2419                    "ⓛ", // U+24DB: CIRCLED LATIN SMALL LETTER L
2420                    "ⱡ", // U+2C61: LATIN SMALL LETTER L WITH DOUBLE BAR
2421                    "ꝇ", // U+A747: LATIN SMALL LETTER BROKEN L
2422                    "ꝉ", // U+A749: LATIN SMALL LETTER L WITH HIGH STROKE
2423                    "ꞁ", // U+A781: LATIN SMALL LETTER TURNED L
2424                    "l", // U+FF4C: FULLWIDTH LATIN SMALL LETTER L
2425                ],
2426                "l",
2427            ),
2428            (
2429                &[
2430                    "LJ", // U+01C7: LATIN CAPITAL LETTER LJ
2431                ],
2432                "LJ",
2433            ),
2434            (
2435                &[
2436                    "Ỻ", // U+1EFA: LATIN CAPITAL LETTER MIDDLE-WELSH LL
2437                ],
2438                "LL",
2439            ),
2440            (
2441                &[
2442                    "Lj", // U+01C8: LATIN CAPITAL LETTER L WITH SMALL LETTER J
2443                ],
2444                "Lj",
2445            ),
2446            (
2447                &[
2448                    "⒧", // U+24A7: PARENTHESIZED LATIN SMALL LETTER L
2449                ],
2450                "(l)",
2451            ),
2452            (
2453                &[
2454                    "lj", // U+01C9: LATIN SMALL LETTER LJ
2455                ],
2456                "lj",
2457            ),
2458            (
2459                &[
2460                    "ỻ", // U+1EFB: LATIN SMALL LETTER MIDDLE-WELSH LL
2461                ],
2462                "ll",
2463            ),
2464            (
2465                &[
2466                    "ʪ", // U+02AA: LATIN SMALL LETTER LS DIGRAPH
2467                ],
2468                "ls",
2469            ),
2470            (
2471                &[
2472                    "ʫ", // U+02AB: LATIN SMALL LETTER LZ DIGRAPH
2473                ],
2474                "lz",
2475            ),
2476            (
2477                &[
2478                    "Ɯ",  // U+019C: LATIN CAPITAL LETTER TURNED M
2479                    "ᴍ", // U+1D0D: LATIN LETTER SMALL CAPITAL M
2480                    "Ḿ", // U+1E3E: LATIN CAPITAL LETTER M WITH ACUTE
2481                    "Ṁ", // U+1E40: LATIN CAPITAL LETTER M WITH DOT ABOVE
2482                    "Ṃ", // U+1E42: LATIN CAPITAL LETTER M WITH DOT BELOW
2483                    "Ⓜ", // U+24C2: CIRCLED LATIN CAPITAL LETTER M
2484                    "Ɱ", // U+2C6E: LATIN CAPITAL LETTER M WITH HOOK
2485                    "ꟽ", // U+A7FD: LATIN EPIGRAPHIC LETTER INVERTED M
2486                    "ꟿ", // U+A7FF: LATIN EPIGRAPHIC LETTER ARCHAIC M
2487                    "M", // U+FF2D: FULLWIDTH LATIN CAPITAL LETTER M
2488                ],
2489                "M",
2490            ),
2491            (
2492                &[
2493                    "ɯ",  // U+026F: LATIN SMALL LETTER TURNED M
2494                    "ɰ",  // U+0270: LATIN SMALL LETTER TURNED M WITH LONG LEG
2495                    "ɱ",  // U+0271: LATIN SMALL LETTER M WITH HOOK
2496                    "ᵯ", // U+1D6F: LATIN SMALL LETTER M WITH MIDDLE TILDE
2497                    "ᶆ", // U+1D86: LATIN SMALL LETTER M WITH PALATAL HOOK
2498                    "ḿ", // U+1E3F: LATIN SMALL LETTER M WITH ACUTE
2499                    "ṁ", // U+1E41: LATIN SMALL LETTER M WITH DOT ABOVE
2500                    "ṃ", // U+1E43: LATIN SMALL LETTER M WITH DOT BELOW
2501                    "ⓜ", // U+24DC: CIRCLED LATIN SMALL LETTER M
2502                    "m", // U+FF4D: FULLWIDTH LATIN SMALL LETTER M
2503                ],
2504                "m",
2505            ),
2506            (
2507                &[
2508                    "⒨", // U+24A8: PARENTHESIZED LATIN SMALL LETTER M
2509                ],
2510                "(m)",
2511            ),
2512            (
2513                &[
2514                    "Ñ",  // U+00D1: LATIN CAPITAL LETTER N WITH TILDE
2515                    "Ń",  // U+0143: LATIN CAPITAL LETTER N WITH ACUTE
2516                    "Ņ",  // U+0145: LATIN CAPITAL LETTER N WITH CEDILLA
2517                    "Ň",  // U+0147: LATIN CAPITAL LETTER N WITH CARON
2518                    "Ŋ",  // U+014A: LATIN CAPITAL LETTER ENG
2519                    "Ɲ",  // U+019D: LATIN CAPITAL LETTER N WITH LEFT HOOK
2520                    "Ǹ",  // U+01F8: LATIN CAPITAL LETTER N WITH GRAVE
2521                    "Ƞ",  // U+0220: LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
2522                    "ɴ",  // U+0274: LATIN LETTER SMALL CAPITAL N
2523                    "ᴎ", // U+1D0E: LATIN LETTER SMALL CAPITAL REVERSED N
2524                    "Ṅ", // U+1E44: LATIN CAPITAL LETTER N WITH DOT ABOVE
2525                    "Ṇ", // U+1E46: LATIN CAPITAL LETTER N WITH DOT BELOW
2526                    "Ṉ", // U+1E48: LATIN CAPITAL LETTER N WITH LINE BELOW
2527                    "Ṋ", // U+1E4A: LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
2528                    "Ⓝ", // U+24C3: CIRCLED LATIN CAPITAL LETTER N
2529                    "N", // U+FF2E: FULLWIDTH LATIN CAPITAL LETTER N
2530                ],
2531                "N",
2532            ),
2533            (
2534                &[
2535                    "ñ",  // U+00F1: LATIN SMALL LETTER N WITH TILDE
2536                    "ń",  // U+0144: LATIN SMALL LETTER N WITH ACUTE
2537                    "ņ",  // U+0146: LATIN SMALL LETTER N WITH CEDILLA
2538                    "ň",  // U+0148: LATIN SMALL LETTER N WITH CARON
2539                    "ʼn",  // U+0149: LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
2540                    "ŋ",  // U+014B: LATIN SMALL LETTER ENG
2541                    "ƞ",  // U+019E: LATIN SMALL LETTER N WITH LONG RIGHT LEG
2542                    "ǹ",  // U+01F9: LATIN SMALL LETTER N WITH GRAVE
2543                    "ȵ",  // U+0235: LATIN SMALL LETTER N WITH CURL
2544                    "ɲ",  // U+0272: LATIN SMALL LETTER N WITH LEFT HOOK
2545                    "ɳ",  // U+0273: LATIN SMALL LETTER N WITH RETROFLEX HOOK
2546                    "ᵰ", // U+1D70: LATIN SMALL LETTER N WITH MIDDLE TILDE
2547                    "ᶇ", // U+1D87: LATIN SMALL LETTER N WITH PALATAL HOOK
2548                    "ṅ", // U+1E45: LATIN SMALL LETTER N WITH DOT ABOVE
2549                    "ṇ", // U+1E47: LATIN SMALL LETTER N WITH DOT BELOW
2550                    "ṉ", // U+1E49: LATIN SMALL LETTER N WITH LINE BELOW
2551                    "ṋ", // U+1E4B: LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW
2552                    "ⁿ", // U+207F: SUPERSCRIPT LATIN SMALL LETTER N
2553                    "ⓝ", // U+24DD: CIRCLED LATIN SMALL LETTER N
2554                    "n", // U+FF4E: FULLWIDTH LATIN SMALL LETTER N
2555                ],
2556                "n",
2557            ),
2558            (
2559                &[
2560                    "NJ", // U+01CA: LATIN CAPITAL LETTER NJ
2561                ],
2562                "NJ",
2563            ),
2564            (
2565                &[
2566                    "Nj", // U+01CB: LATIN CAPITAL LETTER N WITH SMALL LETTER J
2567                ],
2568                "Nj",
2569            ),
2570            (
2571                &[
2572                    "⒩", // U+24A9: PARENTHESIZED LATIN SMALL LETTER N
2573                ],
2574                "(n)",
2575            ),
2576            (
2577                &[
2578                    "nj", // U+01CC: LATIN SMALL LETTER NJ
2579                ],
2580                "nj",
2581            ),
2582            (
2583                &[
2584                    "Ò",  // U+00D2: LATIN CAPITAL LETTER O WITH GRAVE
2585                    "Ó",  // U+00D3: LATIN CAPITAL LETTER O WITH ACUTE
2586                    "Ô",  // U+00D4: LATIN CAPITAL LETTER O WITH CIRCUMFLEX
2587                    "Õ",  // U+00D5: LATIN CAPITAL LETTER O WITH TILDE
2588                    "Ö",  // U+00D6: LATIN CAPITAL LETTER O WITH DIAERESIS
2589                    "Ø",  // U+00D8: LATIN CAPITAL LETTER O WITH STROKE
2590                    "Ō",  // U+014C: LATIN CAPITAL LETTER O WITH MACRON
2591                    "Ŏ",  // U+014E: LATIN CAPITAL LETTER O WITH BREVE
2592                    "Ő",  // U+0150: LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
2593                    "Ɔ",  // U+0186: LATIN CAPITAL LETTER OPEN O
2594                    "Ɵ",  // U+019F: LATIN CAPITAL LETTER O WITH MIDDLE TILDE
2595                    "Ơ",  // U+01A0: LATIN CAPITAL LETTER O WITH HORN
2596                    "Ǒ",  // U+01D1: LATIN CAPITAL LETTER O WITH CARON
2597                    "Ǫ",  // U+01EA: LATIN CAPITAL LETTER O WITH OGONEK
2598                    "Ǭ",  // U+01EC: LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
2599                    "Ǿ",  // U+01FE: LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
2600                    "Ȍ",  // U+020C: LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
2601                    "Ȏ",  // U+020E: LATIN CAPITAL LETTER O WITH INVERTED BREVE
2602                    "Ȫ",  // U+022A: LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
2603                    "Ȭ",  // U+022C: LATIN CAPITAL LETTER O WITH TILDE AND MACRON
2604                    "Ȯ",  // U+022E: LATIN CAPITAL LETTER O WITH DOT ABOVE
2605                    "Ȱ",  // U+0230: LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
2606                    "ᴏ", // U+1D0F: LATIN LETTER SMALL CAPITAL O
2607                    "ᴐ", // U+1D10: LATIN LETTER SMALL CAPITAL OPEN O
2608                    "Ṍ", // U+1E4C: LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
2609                    "Ṏ", // U+1E4E: LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
2610                    "Ṑ", // U+1E50: LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
2611                    "Ṓ", // U+1E52: LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
2612                    "Ọ", // U+1ECC: LATIN CAPITAL LETTER O WITH DOT BELOW
2613                    "Ỏ", // U+1ECE: LATIN CAPITAL LETTER O WITH HOOK ABOVE
2614                    "Ố", // U+1ED0: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
2615                    "Ồ", // U+1ED2: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
2616                    "Ổ", // U+1ED4: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
2617                    "Ỗ", // U+1ED6: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
2618                    "Ộ", // U+1ED8: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
2619                    "Ớ", // U+1EDA: LATIN CAPITAL LETTER O WITH HORN AND ACUTE
2620                    "Ờ", // U+1EDC: LATIN CAPITAL LETTER O WITH HORN AND GRAVE
2621                    "Ở", // U+1EDE: LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
2622                    "Ỡ", // U+1EE0: LATIN CAPITAL LETTER O WITH HORN AND TILDE
2623                    "Ợ", // U+1EE2: LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
2624                    "Ⓞ", // U+24C4: CIRCLED LATIN CAPITAL LETTER O
2625                    "Ꝋ", // U+A74A: LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY
2626                    "Ꝍ", // U+A74C: LATIN CAPITAL LETTER O WITH LOOP
2627                    "O", // U+FF2F: FULLWIDTH LATIN CAPITAL LETTER O
2628                ],
2629                "O",
2630            ),
2631            (
2632                &[
2633                    "ò",  // U+00F2: LATIN SMALL LETTER O WITH GRAVE
2634                    "ó",  // U+00F3: LATIN SMALL LETTER O WITH ACUTE
2635                    "ô",  // U+00F4: LATIN SMALL LETTER O WITH CIRCUMFLEX
2636                    "õ",  // U+00F5: LATIN SMALL LETTER O WITH TILDE
2637                    "ö",  // U+00F6: LATIN SMALL LETTER O WITH DIAERESIS
2638                    "ø",  // U+00F8: LATIN SMALL LETTER O WITH STROKE
2639                    "ō",  // U+014D: LATIN SMALL LETTER O WITH MACRON
2640                    "ŏ",  // U+014F: LATIN SMALL LETTER O WITH BREVE
2641                    "ő",  // U+0151: LATIN SMALL LETTER O WITH DOUBLE ACUTE
2642                    "ơ",  // U+01A1: LATIN SMALL LETTER O WITH HORN
2643                    "ǒ",  // U+01D2: LATIN SMALL LETTER O WITH CARON
2644                    "ǫ",  // U+01EB: LATIN SMALL LETTER O WITH OGONEK
2645                    "ǭ",  // U+01ED: LATIN SMALL LETTER O WITH OGONEK AND MACRON
2646                    "ǿ",  // U+01FF: LATIN SMALL LETTER O WITH STROKE AND ACUTE
2647                    "ȍ",  // U+020D: LATIN SMALL LETTER O WITH DOUBLE GRAVE
2648                    "ȏ",  // U+020F: LATIN SMALL LETTER O WITH INVERTED BREVE
2649                    "ȫ",  // U+022B: LATIN SMALL LETTER O WITH DIAERESIS AND MACRON
2650                    "ȭ",  // U+022D: LATIN SMALL LETTER O WITH TILDE AND MACRON
2651                    "ȯ",  // U+022F: LATIN SMALL LETTER O WITH DOT ABOVE
2652                    "ȱ",  // U+0231: LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON
2653                    "ɔ",  // U+0254: LATIN SMALL LETTER OPEN O
2654                    "ɵ",  // U+0275: LATIN SMALL LETTER BARRED O
2655                    "ᴖ", // U+1D16: LATIN SMALL LETTER TOP HALF O
2656                    "ᴗ", // U+1D17: LATIN SMALL LETTER BOTTOM HALF O
2657                    "ᶗ", // U+1D97: LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK
2658                    "ṍ", // U+1E4D: LATIN SMALL LETTER O WITH TILDE AND ACUTE
2659                    "ṏ", // U+1E4F: LATIN SMALL LETTER O WITH TILDE AND DIAERESIS
2660                    "ṑ", // U+1E51: LATIN SMALL LETTER O WITH MACRON AND GRAVE
2661                    "ṓ", // U+1E53: LATIN SMALL LETTER O WITH MACRON AND ACUTE
2662                    "ọ", // U+1ECD: LATIN SMALL LETTER O WITH DOT BELOW
2663                    "ỏ", // U+1ECF: LATIN SMALL LETTER O WITH HOOK ABOVE
2664                    "ố", // U+1ED1: LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE
2665                    "ồ", // U+1ED3: LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE
2666                    "ổ", // U+1ED5: LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
2667                    "ỗ", // U+1ED7: LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE
2668                    "ộ", // U+1ED9: LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW
2669                    "ớ", // U+1EDB: LATIN SMALL LETTER O WITH HORN AND ACUTE
2670                    "ờ", // U+1EDD: LATIN SMALL LETTER O WITH HORN AND GRAVE
2671                    "ở", // U+1EDF: LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE
2672                    "ỡ", // U+1EE1: LATIN SMALL LETTER O WITH HORN AND TILDE
2673                    "ợ", // U+1EE3: LATIN SMALL LETTER O WITH HORN AND DOT BELOW
2674                    "ₒ", // U+2092: LATIN SUBSCRIPT SMALL LETTER O
2675                    "ⓞ", // U+24DE: CIRCLED LATIN SMALL LETTER O
2676                    "ⱺ", // U+2C7A: LATIN SMALL LETTER O WITH LOW RING INSIDE
2677                    "ꝋ", // U+A74B: LATIN SMALL LETTER O WITH LONG STROKE OVERLAY
2678                    "ꝍ", // U+A74D: LATIN SMALL LETTER O WITH LOOP
2679                    "o", // U+FF4F: FULLWIDTH LATIN SMALL LETTER O
2680                ],
2681                "o",
2682            ),
2683            (
2684                &[
2685                    "Œ", // U+0152: LATIN CAPITAL LIGATURE OE
2686                    "ɶ", // U+0276: LATIN LETTER SMALL CAPITAL OE
2687                ],
2688                "OE",
2689            ),
2690            (
2691                &[
2692                    "Ꝏ", // U+A74E: LATIN CAPITAL LETTER OO
2693                ],
2694                "OO",
2695            ),
2696            (
2697                &[
2698                    "Ȣ",  // U+0222: LATIN CAPITAL LETTER OU
2699                    "ᴕ", // U+1D15: LATIN LETTER SMALL CAPITAL OU
2700                ],
2701                "OU",
2702            ),
2703            (
2704                &[
2705                    "⒪", // U+24AA: PARENTHESIZED LATIN SMALL LETTER O
2706                ],
2707                "(o)",
2708            ),
2709            (
2710                &[
2711                    "œ",  // U+0153: LATIN SMALL LIGATURE OE
2712                    "ᴔ", // U+1D14: LATIN SMALL LETTER TURNED OE
2713                ],
2714                "oe",
2715            ),
2716            (
2717                &[
2718                    "ꝏ", // U+A74F: LATIN SMALL LETTER OO
2719                ],
2720                "oo",
2721            ),
2722            (
2723                &[
2724                    "ȣ", // U+0223: LATIN SMALL LETTER OU
2725                ],
2726                "ou",
2727            ),
2728            (
2729                &[
2730                    "Ƥ",  // U+01A4: LATIN CAPITAL LETTER P WITH HOOK
2731                    "ᴘ", // U+1D18: LATIN LETTER SMALL CAPITAL P
2732                    "Ṕ", // U+1E54: LATIN CAPITAL LETTER P WITH ACUTE
2733                    "Ṗ", // U+1E56: LATIN CAPITAL LETTER P WITH DOT ABOVE
2734                    "Ⓟ", // U+24C5: CIRCLED LATIN CAPITAL LETTER P
2735                    "Ᵽ", // U+2C63: LATIN CAPITAL LETTER P WITH STROKE
2736                    "Ꝑ", // U+A750: LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER
2737                    "Ꝓ", // U+A752: LATIN CAPITAL LETTER P WITH FLOURISH
2738                    "Ꝕ", // U+A754: LATIN CAPITAL LETTER P WITH SQUIRREL TAIL
2739                    "P", // U+FF30: FULLWIDTH LATIN CAPITAL LETTER P
2740                ],
2741                "P",
2742            ),
2743            (
2744                &[
2745                    "ƥ",  // U+01A5: LATIN SMALL LETTER P WITH HOOK
2746                    "ᵱ", // U+1D71: LATIN SMALL LETTER P WITH MIDDLE TILDE
2747                    "ᵽ", // U+1D7D: LATIN SMALL LETTER P WITH STROKE
2748                    "ᶈ", // U+1D88: LATIN SMALL LETTER P WITH PALATAL HOOK
2749                    "ṕ", // U+1E55: LATIN SMALL LETTER P WITH ACUTE
2750                    "ṗ", // U+1E57: LATIN SMALL LETTER P WITH DOT ABOVE
2751                    "ⓟ", // U+24DF: CIRCLED LATIN SMALL LETTER P
2752                    "ꝑ", // U+A751: LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER
2753                    "ꝓ", // U+A753: LATIN SMALL LETTER P WITH FLOURISH
2754                    "ꝕ", // U+A755: LATIN SMALL LETTER P WITH SQUIRREL TAIL
2755                    "ꟼ", // U+A7FC: LATIN EPIGRAPHIC LETTER REVERSED P
2756                    "p", // U+FF50: FULLWIDTH LATIN SMALL LETTER P
2757                ],
2758                "p",
2759            ),
2760            (
2761                &[
2762                    "⒫", // U+24AB: PARENTHESIZED LATIN SMALL LETTER P
2763                ],
2764                "(p)",
2765            ),
2766            (
2767                &[
2768                    "Ɋ",  // U+024A: LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL
2769                    "Ⓠ", // U+24C6: CIRCLED LATIN CAPITAL LETTER Q
2770                    "Ꝗ", // U+A756: LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER
2771                    "Ꝙ", // U+A758: LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE
2772                    "Q", // U+FF31: FULLWIDTH LATIN CAPITAL LETTER Q
2773                ],
2774                "Q",
2775            ),
2776            (
2777                &[
2778                    "ĸ",  // U+0138: LATIN SMALL LETTER KRA
2779                    "ɋ",  // U+024B: LATIN SMALL LETTER Q WITH HOOK TAIL
2780                    "ʠ",  // U+02A0: LATIN SMALL LETTER Q WITH HOOK
2781                    "ⓠ", // U+24E0: CIRCLED LATIN SMALL LETTER Q
2782                    "ꝗ", // U+A757: LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER
2783                    "ꝙ", // U+A759: LATIN SMALL LETTER Q WITH DIAGONAL STROKE
2784                    "q", // U+FF51: FULLWIDTH LATIN SMALL LETTER Q
2785                ],
2786                "q",
2787            ),
2788            (
2789                &[
2790                    "⒬", // U+24AC: PARENTHESIZED LATIN SMALL LETTER Q
2791                ],
2792                "(q)",
2793            ),
2794            (
2795                &[
2796                    "ȹ", // U+0239: LATIN SMALL LETTER QP DIGRAPH
2797                ],
2798                "qp",
2799            ),
2800            (
2801                &[
2802                    "Ŕ",  // U+0154: LATIN CAPITAL LETTER R WITH ACUTE
2803                    "Ŗ",  // U+0156: LATIN CAPITAL LETTER R WITH CEDILLA
2804                    "Ř",  // U+0158: LATIN CAPITAL LETTER R WITH CARON
2805                    "Ȑ",  // U+0210: LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
2806                    "Ȓ",  // U+0212: LATIN CAPITAL LETTER R WITH INVERTED BREVE
2807                    "Ɍ",  // U+024C: LATIN CAPITAL LETTER R WITH STROKE
2808                    "ʀ",  // U+0280: LATIN LETTER SMALL CAPITAL R
2809                    "ʁ",  // U+0281: LATIN LETTER SMALL CAPITAL INVERTED R
2810                    "ᴙ", // U+1D19: LATIN LETTER SMALL CAPITAL REVERSED R
2811                    "ᴚ", // U+1D1A: LATIN LETTER SMALL CAPITAL TURNED R
2812                    "Ṙ", // U+1E58: LATIN CAPITAL LETTER R WITH DOT ABOVE
2813                    "Ṛ", // U+1E5A: LATIN CAPITAL LETTER R WITH DOT BELOW
2814                    "Ṝ", // U+1E5C: LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
2815                    "Ṟ", // U+1E5E: LATIN CAPITAL LETTER R WITH LINE BELOW
2816                    "Ⓡ", // U+24C7: CIRCLED LATIN CAPITAL LETTER R
2817                    "Ɽ", // U+2C64: LATIN CAPITAL LETTER R WITH TAIL
2818                    "Ꝛ", // U+A75A: LATIN CAPITAL LETTER R ROTUNDA
2819                    "Ꞃ", // U+A782: LATIN CAPITAL LETTER INSULAR R
2820                    "R", // U+FF32: FULLWIDTH LATIN CAPITAL LETTER R
2821                ],
2822                "R",
2823            ),
2824            (
2825                &[
2826                    "ŕ",  // U+0155: LATIN SMALL LETTER R WITH ACUTE
2827                    "ŗ",  // U+0157: LATIN SMALL LETTER R WITH CEDILLA
2828                    "ř",  // U+0159: LATIN SMALL LETTER R WITH CARON
2829                    "ȑ",  // U+0211: LATIN SMALL LETTER R WITH DOUBLE GRAVE
2830                    "ȓ",  // U+0213: LATIN SMALL LETTER R WITH INVERTED BREVE
2831                    "ɍ",  // U+024D: LATIN SMALL LETTER R WITH STROKE
2832                    "ɼ",  // U+027C: LATIN SMALL LETTER R WITH LONG LEG
2833                    "ɽ",  // U+027D: LATIN SMALL LETTER R WITH TAIL
2834                    "ɾ",  // U+027E: LATIN SMALL LETTER R WITH FISHHOOK
2835                    "ɿ",  // U+027F: LATIN SMALL LETTER REVERSED R WITH FISHHOOK
2836                    "ᵣ", // U+1D63: LATIN SUBSCRIPT SMALL LETTER R
2837                    "ᵲ", // U+1D72: LATIN SMALL LETTER R WITH MIDDLE TILDE
2838                    "ᵳ", // U+1D73: LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE
2839                    "ᶉ", // U+1D89: LATIN SMALL LETTER R WITH PALATAL HOOK
2840                    "ṙ", // U+1E59: LATIN SMALL LETTER R WITH DOT ABOVE
2841                    "ṛ", // U+1E5B: LATIN SMALL LETTER R WITH DOT BELOW
2842                    "ṝ", // U+1E5D: LATIN SMALL LETTER R WITH DOT BELOW AND MACRON
2843                    "ṟ", // U+1E5F: LATIN SMALL LETTER R WITH LINE BELOW
2844                    "ⓡ", // U+24E1: CIRCLED LATIN SMALL LETTER R
2845                    "ꝛ", // U+A75B: LATIN SMALL LETTER R ROTUNDA
2846                    "ꞃ", // U+A783: LATIN SMALL LETTER INSULAR R
2847                    "r", // U+FF52: FULLWIDTH LATIN SMALL LETTER R
2848                ],
2849                "r",
2850            ),
2851            (
2852                &[
2853                    "⒭", // U+24AD: PARENTHESIZED LATIN SMALL LETTER R
2854                ],
2855                "(r)",
2856            ),
2857            (
2858                &[
2859                    "Ś",  // U+015A: LATIN CAPITAL LETTER S WITH ACUTE
2860                    "Ŝ",  // U+015C: LATIN CAPITAL LETTER S WITH CIRCUMFLEX
2861                    "Ş",  // U+015E: LATIN CAPITAL LETTER S WITH CEDILLA
2862                    "Š",  // U+0160: LATIN CAPITAL LETTER S WITH CARON
2863                    "Ș",  // U+0218: LATIN CAPITAL LETTER S WITH COMMA BELOW
2864                    "Ṡ", // U+1E60: LATIN CAPITAL LETTER S WITH DOT ABOVE
2865                    "Ṣ", // U+1E62: LATIN CAPITAL LETTER S WITH DOT BELOW
2866                    "Ṥ", // U+1E64: LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
2867                    "Ṧ", // U+1E66: LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
2868                    "Ṩ", // U+1E68: LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
2869                    "Ⓢ", // U+24C8: CIRCLED LATIN CAPITAL LETTER S
2870                    "ꜱ", // U+A731: LATIN LETTER SMALL CAPITAL S
2871                    "ꞅ", // U+A785: LATIN SMALL LETTER INSULAR S
2872                    "S", // U+FF33: FULLWIDTH LATIN CAPITAL LETTER S
2873                ],
2874                "S",
2875            ),
2876            (
2877                &[
2878                    "ś",  // U+015B: LATIN SMALL LETTER S WITH ACUTE
2879                    "ŝ",  // U+015D: LATIN SMALL LETTER S WITH CIRCUMFLEX
2880                    "ş",  // U+015F: LATIN SMALL LETTER S WITH CEDILLA
2881                    "š",  // U+0161: LATIN SMALL LETTER S WITH CARON
2882                    "ſ",  // U+017F: LATIN SMALL LETTER LONG S
2883                    "ș",  // U+0219: LATIN SMALL LETTER S WITH COMMA BELOW
2884                    "ȿ",  // U+023F: LATIN SMALL LETTER S WITH SWASH TAIL
2885                    "ʂ",  // U+0282: LATIN SMALL LETTER S WITH HOOK
2886                    "ᵴ", // U+1D74: LATIN SMALL LETTER S WITH MIDDLE TILDE
2887                    "ᶊ", // U+1D8A: LATIN SMALL LETTER S WITH PALATAL HOOK
2888                    "ṡ", // U+1E61: LATIN SMALL LETTER S WITH DOT ABOVE
2889                    "ṣ", // U+1E63: LATIN SMALL LETTER S WITH DOT BELOW
2890                    "ṥ", // U+1E65: LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE
2891                    "ṧ", // U+1E67: LATIN SMALL LETTER S WITH CARON AND DOT ABOVE
2892                    "ṩ", // U+1E69: LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE
2893                    "ẜ", // U+1E9C: LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE
2894                    "ẝ", // U+1E9D: LATIN SMALL LETTER LONG S WITH HIGH STROKE
2895                    "ⓢ", // U+24E2: CIRCLED LATIN SMALL LETTER S
2896                    "Ꞅ", // U+A784: LATIN CAPITAL LETTER INSULAR S
2897                    "s", // U+FF53: FULLWIDTH LATIN SMALL LETTER S
2898                ],
2899                "s",
2900            ),
2901            (
2902                &[
2903                    "ẞ", // U+1E9E: LATIN CAPITAL LETTER SHARP S
2904                ],
2905                "SS",
2906            ),
2907            (
2908                &[
2909                    "⒮", // U+24AE: PARENTHESIZED LATIN SMALL LETTER S
2910                ],
2911                "(s)",
2912            ),
2913            (
2914                &[
2915                    "ß", // U+00DF: LATIN SMALL LETTER SHARP S
2916                ],
2917                "ss",
2918            ),
2919            (
2920                &[
2921                    "st", // U+FB06: LATIN SMALL LIGATURE ST
2922                ],
2923                "st",
2924            ),
2925            (
2926                &[
2927                    "Ţ",  // U+0162: LATIN CAPITAL LETTER T WITH CEDILLA
2928                    "Ť",  // U+0164: LATIN CAPITAL LETTER T WITH CARON
2929                    "Ŧ",  // U+0166: LATIN CAPITAL LETTER T WITH STROKE
2930                    "Ƭ",  // U+01AC: LATIN CAPITAL LETTER T WITH HOOK
2931                    "Ʈ",  // U+01AE: LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
2932                    "Ț",  // U+021A: LATIN CAPITAL LETTER T WITH COMMA BELOW
2933                    "Ⱦ",  // U+023E: LATIN CAPITAL LETTER T WITH DIAGONAL STROKE
2934                    "ᴛ", // U+1D1B: LATIN LETTER SMALL CAPITAL T
2935                    "Ṫ", // U+1E6A: LATIN CAPITAL LETTER T WITH DOT ABOVE
2936                    "Ṭ", // U+1E6C: LATIN CAPITAL LETTER T WITH DOT BELOW
2937                    "Ṯ", // U+1E6E: LATIN CAPITAL LETTER T WITH LINE BELOW
2938                    "Ṱ", // U+1E70: LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
2939                    "Ⓣ", // U+24C9: CIRCLED LATIN CAPITAL LETTER T
2940                    "Ꞇ", // U+A786: LATIN CAPITAL LETTER INSULAR T
2941                    "T", // U+FF34: FULLWIDTH LATIN CAPITAL LETTER T
2942                ],
2943                "T",
2944            ),
2945            (
2946                &[
2947                    "ţ",  // U+0163: LATIN SMALL LETTER T WITH CEDILLA
2948                    "ť",  // U+0165: LATIN SMALL LETTER T WITH CARON
2949                    "ŧ",  // U+0167: LATIN SMALL LETTER T WITH STROKE
2950                    "ƫ",  // U+01AB: LATIN SMALL LETTER T WITH PALATAL HOOK
2951                    "ƭ",  // U+01AD: LATIN SMALL LETTER T WITH HOOK
2952                    "ț",  // U+021B: LATIN SMALL LETTER T WITH COMMA BELOW
2953                    "ȶ",  // U+0236: LATIN SMALL LETTER T WITH CURL
2954                    "ʇ",  // U+0287: LATIN SMALL LETTER TURNED T
2955                    "ʈ",  // U+0288: LATIN SMALL LETTER T WITH RETROFLEX HOOK
2956                    "ᵵ", // U+1D75: LATIN SMALL LETTER T WITH MIDDLE TILDE
2957                    "ṫ", // U+1E6B: LATIN SMALL LETTER T WITH DOT ABOVE
2958                    "ṭ", // U+1E6D: LATIN SMALL LETTER T WITH DOT BELOW
2959                    "ṯ", // U+1E6F: LATIN SMALL LETTER T WITH LINE BELOW
2960                    "ṱ", // U+1E71: LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW
2961                    "ẗ", // U+1E97: LATIN SMALL LETTER T WITH DIAERESIS
2962                    "ⓣ", // U+24E3: CIRCLED LATIN SMALL LETTER T
2963                    "ⱦ", // U+2C66: LATIN SMALL LETTER T WITH DIAGONAL STROKE
2964                    "t", // U+FF54: FULLWIDTH LATIN SMALL LETTER T
2965                ],
2966                "t",
2967            ),
2968            (
2969                &[
2970                    "Þ",  // U+00DE: LATIN CAPITAL LETTER THORN
2971                    "Ꝧ", // U+A766: LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER
2972                ],
2973                "TH",
2974            ),
2975            (
2976                &[
2977                    "Ꜩ", // U+A728: LATIN CAPITAL LETTER TZ
2978                ],
2979                "TZ",
2980            ),
2981            (
2982                &[
2983                    "⒯", // U+24AF: PARENTHESIZED LATIN SMALL LETTER T
2984                ],
2985                "(t)",
2986            ),
2987            (
2988                &[
2989                    "ʨ", // U+02A8: LATIN SMALL LETTER TC DIGRAPH WITH CURL
2990                ],
2991                "tc",
2992            ),
2993            (
2994                &[
2995                    "þ",  // U+00FE: LATIN SMALL LETTER THORN
2996                    "ᵺ", // U+1D7A: LATIN SMALL LETTER TH WITH STRIKETHROUGH
2997                    "ꝧ", // U+A767: LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER
2998                ],
2999                "th",
3000            ),
3001            (
3002                &[
3003                    "ʦ", // U+02A6: LATIN SMALL LETTER TS DIGRAPH
3004                ],
3005                "ts",
3006            ),
3007            (
3008                &[
3009                    "ꜩ", // U+A729: LATIN SMALL LETTER TZ
3010                ],
3011                "tz",
3012            ),
3013            (
3014                &[
3015                    "Ù",  // U+00D9: LATIN CAPITAL LETTER U WITH GRAVE
3016                    "Ú",  // U+00DA: LATIN CAPITAL LETTER U WITH ACUTE
3017                    "Û",  // U+00DB: LATIN CAPITAL LETTER U WITH CIRCUMFLEX
3018                    "Ü",  // U+00DC: LATIN CAPITAL LETTER U WITH DIAERESIS
3019                    "Ũ",  // U+0168: LATIN CAPITAL LETTER U WITH TILDE
3020                    "Ū",  // U+016A: LATIN CAPITAL LETTER U WITH MACRON
3021                    "Ŭ",  // U+016C: LATIN CAPITAL LETTER U WITH BREVE
3022                    "Ů",  // U+016E: LATIN CAPITAL LETTER U WITH RING ABOVE
3023                    "Ű",  // U+0170: LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
3024                    "Ų",  // U+0172: LATIN CAPITAL LETTER U WITH OGONEK
3025                    "Ư",  // U+01AF: LATIN CAPITAL LETTER U WITH HORN
3026                    "Ǔ",  // U+01D3: LATIN CAPITAL LETTER U WITH CARON
3027                    "Ǖ",  // U+01D5: LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
3028                    "Ǘ",  // U+01D7: LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
3029                    "Ǚ",  // U+01D9: LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
3030                    "Ǜ",  // U+01DB: LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
3031                    "Ȕ",  // U+0214: LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
3032                    "Ȗ",  // U+0216: LATIN CAPITAL LETTER U WITH INVERTED BREVE
3033                    "Ʉ",  // U+0244: LATIN CAPITAL LETTER U BAR
3034                    "ᴜ", // U+1D1C: LATIN LETTER SMALL CAPITAL U
3035                    "ᵾ", // U+1D7E: LATIN SMALL CAPITAL LETTER U WITH STROKE
3036                    "Ṳ", // U+1E72: LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
3037                    "Ṵ", // U+1E74: LATIN CAPITAL LETTER U WITH TILDE BELOW
3038                    "Ṷ", // U+1E76: LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
3039                    "Ṹ", // U+1E78: LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
3040                    "Ṻ", // U+1E7A: LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
3041                    "Ụ", // U+1EE4: LATIN CAPITAL LETTER U WITH DOT BELOW
3042                    "Ủ", // U+1EE6: LATIN CAPITAL LETTER U WITH HOOK ABOVE
3043                    "Ứ", // U+1EE8: LATIN CAPITAL LETTER U WITH HORN AND ACUTE
3044                    "Ừ", // U+1EEA: LATIN CAPITAL LETTER U WITH HORN AND GRAVE
3045                    "Ử", // U+1EEC: LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
3046                    "Ữ", // U+1EEE: LATIN CAPITAL LETTER U WITH HORN AND TILDE
3047                    "Ự", // U+1EF0: LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
3048                    "Ⓤ", // U+24CA: CIRCLED LATIN CAPITAL LETTER U
3049                    "U", // U+FF35: FULLWIDTH LATIN CAPITAL LETTER U
3050                ],
3051                "U",
3052            ),
3053            (
3054                &[
3055                    "ù",  // U+00F9: LATIN SMALL LETTER U WITH GRAVE
3056                    "ú",  // U+00FA: LATIN SMALL LETTER U WITH ACUTE
3057                    "û",  // U+00FB: LATIN SMALL LETTER U WITH CIRCUMFLEX
3058                    "ü",  // U+00FC: LATIN SMALL LETTER U WITH DIAERESIS
3059                    "ũ",  // U+0169: LATIN SMALL LETTER U WITH TILDE
3060                    "ū",  // U+016B: LATIN SMALL LETTER U WITH MACRON
3061                    "ŭ",  // U+016D: LATIN SMALL LETTER U WITH BREVE
3062                    "ů",  // U+016F: LATIN SMALL LETTER U WITH RING ABOVE
3063                    "ű",  // U+0171: LATIN SMALL LETTER U WITH DOUBLE ACUTE
3064                    "ų",  // U+0173: LATIN SMALL LETTER U WITH OGONEK
3065                    "ư",  // U+01B0: LATIN SMALL LETTER U WITH HORN
3066                    "ǔ",  // U+01D4: LATIN SMALL LETTER U WITH CARON
3067                    "ǖ",  // U+01D6: LATIN SMALL LETTER U WITH DIAERESIS AND MACRON
3068                    "ǘ",  // U+01D8: LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE
3069                    "ǚ",  // U+01DA: LATIN SMALL LETTER U WITH DIAERESIS AND CARON
3070                    "ǜ",  // U+01DC: LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE
3071                    "ȕ",  // U+0215: LATIN SMALL LETTER U WITH DOUBLE GRAVE
3072                    "ȗ",  // U+0217: LATIN SMALL LETTER U WITH INVERTED BREVE
3073                    "ʉ",  // U+0289: LATIN SMALL LETTER U BAR
3074                    "ᵤ", // U+1D64: LATIN SUBSCRIPT SMALL LETTER U
3075                    "ᶙ", // U+1D99: LATIN SMALL LETTER U WITH RETROFLEX HOOK
3076                    "ṳ", // U+1E73: LATIN SMALL LETTER U WITH DIAERESIS BELOW
3077                    "ṵ", // U+1E75: LATIN SMALL LETTER U WITH TILDE BELOW
3078                    "ṷ", // U+1E77: LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW
3079                    "ṹ", // U+1E79: LATIN SMALL LETTER U WITH TILDE AND ACUTE
3080                    "ṻ", // U+1E7B: LATIN SMALL LETTER U WITH MACRON AND DIAERESIS
3081                    "ụ", // U+1EE5: LATIN SMALL LETTER U WITH DOT BELOW
3082                    "ủ", // U+1EE7: LATIN SMALL LETTER U WITH HOOK ABOVE
3083                    "ứ", // U+1EE9: LATIN SMALL LETTER U WITH HORN AND ACUTE
3084                    "ừ", // U+1EEB: LATIN SMALL LETTER U WITH HORN AND GRAVE
3085                    "ử", // U+1EED: LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE
3086                    "ữ", // U+1EEF: LATIN SMALL LETTER U WITH HORN AND TILDE
3087                    "ự", // U+1EF1: LATIN SMALL LETTER U WITH HORN AND DOT BELOW
3088                    "ⓤ", // U+24E4: CIRCLED LATIN SMALL LETTER U
3089                    "u", // U+FF55: FULLWIDTH LATIN SMALL LETTER U
3090                ],
3091                "u",
3092            ),
3093            (
3094                &[
3095                    "⒰", // U+24B0: PARENTHESIZED LATIN SMALL LETTER U
3096                ],
3097                "(u)",
3098            ),
3099            (
3100                &[
3101                    "ᵫ", // U+1D6B: LATIN SMALL LETTER UE
3102                ],
3103                "ue",
3104            ),
3105            (
3106                &[
3107                    "Ʋ",  // U+01B2: LATIN CAPITAL LETTER V WITH HOOK
3108                    "Ʌ",  // U+0245: LATIN CAPITAL LETTER TURNED V
3109                    "ᴠ", // U+1D20: LATIN LETTER SMALL CAPITAL V
3110                    "Ṽ", // U+1E7C: LATIN CAPITAL LETTER V WITH TILDE
3111                    "Ṿ", // U+1E7E: LATIN CAPITAL LETTER V WITH DOT BELOW
3112                    "Ỽ", // U+1EFC: LATIN CAPITAL LETTER MIDDLE-WELSH V
3113                    "Ⓥ", // U+24CB: CIRCLED LATIN CAPITAL LETTER V
3114                    "Ꝟ", // U+A75E: LATIN CAPITAL LETTER V WITH DIAGONAL STROKE
3115                    "Ꝩ", // U+A768: LATIN CAPITAL LETTER VEND
3116                    "V", // U+FF36: FULLWIDTH LATIN CAPITAL LETTER V
3117                ],
3118                "V",
3119            ),
3120            (
3121                &[
3122                    "ʋ",  // U+028B: LATIN SMALL LETTER V WITH HOOK
3123                    "ʌ",  // U+028C: LATIN SMALL LETTER TURNED V
3124                    "ᵥ", // U+1D65: LATIN SUBSCRIPT SMALL LETTER V
3125                    "ᶌ", // U+1D8C: LATIN SMALL LETTER V WITH PALATAL HOOK
3126                    "ṽ", // U+1E7D: LATIN SMALL LETTER V WITH TILDE
3127                    "ṿ", // U+1E7F: LATIN SMALL LETTER V WITH DOT BELOW
3128                    "ⓥ", // U+24E5: CIRCLED LATIN SMALL LETTER V
3129                    "ⱱ", // U+2C71: LATIN SMALL LETTER V WITH RIGHT HOOK
3130                    "ⱴ", // U+2C74: LATIN SMALL LETTER V WITH CURL
3131                    "ꝟ", // U+A75F: LATIN SMALL LETTER V WITH DIAGONAL STROKE
3132                    "v", // U+FF56: FULLWIDTH LATIN SMALL LETTER V
3133                ],
3134                "v",
3135            ),
3136            (
3137                &[
3138                    "Ꝡ", // U+A760: LATIN CAPITAL LETTER VY
3139                ],
3140                "VY",
3141            ),
3142            (
3143                &[
3144                    "⒱", // U+24B1: PARENTHESIZED LATIN SMALL LETTER V
3145                ],
3146                "(v)",
3147            ),
3148            (
3149                &[
3150                    "ꝡ", // U+A761: LATIN SMALL LETTER VY
3151                ],
3152                "vy",
3153            ),
3154            (
3155                &[
3156                    "Ŵ",  // U+0174: LATIN CAPITAL LETTER W WITH CIRCUMFLEX
3157                    "Ƿ",  // U+01F7: LATIN CAPITAL LETTER WYNN
3158                    "ᴡ", // U+1D21: LATIN LETTER SMALL CAPITAL W
3159                    "Ẁ", // U+1E80: LATIN CAPITAL LETTER W WITH GRAVE
3160                    "Ẃ", // U+1E82: LATIN CAPITAL LETTER W WITH ACUTE
3161                    "Ẅ", // U+1E84: LATIN CAPITAL LETTER W WITH DIAERESIS
3162                    "Ẇ", // U+1E86: LATIN CAPITAL LETTER W WITH DOT ABOVE
3163                    "Ẉ", // U+1E88: LATIN CAPITAL LETTER W WITH DOT BELOW
3164                    "Ⓦ", // U+24CC: CIRCLED LATIN CAPITAL LETTER W
3165                    "Ⱳ", // U+2C72: LATIN CAPITAL LETTER W WITH HOOK
3166                    "W", // U+FF37: FULLWIDTH LATIN CAPITAL LETTER W
3167                ],
3168                "W",
3169            ),
3170            (
3171                &[
3172                    "ŵ",  // U+0175: LATIN SMALL LETTER W WITH CIRCUMFLEX
3173                    "ƿ",  // U+01BF: LATIN LETTER WYNN
3174                    "ʍ",  // U+028D: LATIN SMALL LETTER TURNED W
3175                    "ẁ", // U+1E81: LATIN SMALL LETTER W WITH GRAVE
3176                    "ẃ", // U+1E83: LATIN SMALL LETTER W WITH ACUTE
3177                    "ẅ", // U+1E85: LATIN SMALL LETTER W WITH DIAERESIS
3178                    "ẇ", // U+1E87: LATIN SMALL LETTER W WITH DOT ABOVE
3179                    "ẉ", // U+1E89: LATIN SMALL LETTER W WITH DOT BELOW
3180                    "ẘ", // U+1E98: LATIN SMALL LETTER W WITH RING ABOVE
3181                    "ⓦ", // U+24E6: CIRCLED LATIN SMALL LETTER W
3182                    "ⱳ", // U+2C73: LATIN SMALL LETTER W WITH HOOK
3183                    "w", // U+FF57: FULLWIDTH LATIN SMALL LETTER W
3184                ],
3185                "w",
3186            ),
3187            (
3188                &[
3189                    "⒲", // U+24B2: PARENTHESIZED LATIN SMALL LETTER W
3190                ],
3191                "(w)",
3192            ),
3193            (
3194                &[
3195                    "Ẋ", // U+1E8A: LATIN CAPITAL LETTER X WITH DOT ABOVE
3196                    "Ẍ", // U+1E8C: LATIN CAPITAL LETTER X WITH DIAERESIS
3197                    "Ⓧ", // U+24CD: CIRCLED LATIN CAPITAL LETTER X
3198                    "X", // U+FF38: FULLWIDTH LATIN CAPITAL LETTER X
3199                ],
3200                "X",
3201            ),
3202            (
3203                &[
3204                    "ᶍ", // U+1D8D: LATIN SMALL LETTER X WITH PALATAL HOOK
3205                    "ẋ", // U+1E8B: LATIN SMALL LETTER X WITH DOT ABOVE
3206                    "ẍ", // U+1E8D: LATIN SMALL LETTER X WITH DIAERESIS
3207                    "ₓ", // U+2093: LATIN SUBSCRIPT SMALL LETTER X
3208                    "ⓧ", // U+24E7: CIRCLED LATIN SMALL LETTER X
3209                    "x", // U+FF58: FULLWIDTH LATIN SMALL LETTER X
3210                ],
3211                "x",
3212            ),
3213            (
3214                &[
3215                    "⒳", // U+24B3: PARENTHESIZED LATIN SMALL LETTER X
3216                ],
3217                "(x)",
3218            ),
3219            (
3220                &[
3221                    "Ý",  // U+00DD: LATIN CAPITAL LETTER Y WITH ACUTE
3222                    "Ŷ",  // U+0176: LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
3223                    "Ÿ",  // U+0178: LATIN CAPITAL LETTER Y WITH DIAERESIS
3224                    "Ƴ",  // U+01B3: LATIN CAPITAL LETTER Y WITH HOOK
3225                    "Ȳ",  // U+0232: LATIN CAPITAL LETTER Y WITH MACRON
3226                    "Ɏ",  // U+024E: LATIN CAPITAL LETTER Y WITH STROKE
3227                    "ʏ",  // U+028F: LATIN LETTER SMALL CAPITAL Y
3228                    "Ẏ", // U+1E8E: LATIN CAPITAL LETTER Y WITH DOT ABOVE
3229                    "Ỳ", // U+1EF2: LATIN CAPITAL LETTER Y WITH GRAVE
3230                    "Ỵ", // U+1EF4: LATIN CAPITAL LETTER Y WITH DOT BELOW
3231                    "Ỷ", // U+1EF6: LATIN CAPITAL LETTER Y WITH HOOK ABOVE
3232                    "Ỹ", // U+1EF8: LATIN CAPITAL LETTER Y WITH TILDE
3233                    "Ỿ", // U+1EFE: LATIN CAPITAL LETTER Y WITH LOOP
3234                    "Ⓨ", // U+24CE: CIRCLED LATIN CAPITAL LETTER Y
3235                    "Y", // U+FF39: FULLWIDTH LATIN CAPITAL LETTER Y
3236                ],
3237                "Y",
3238            ),
3239            (
3240                &[
3241                    "ý",  // U+00FD: LATIN SMALL LETTER Y WITH ACUTE
3242                    "ÿ",  // U+00FF: LATIN SMALL LETTER Y WITH DIAERESIS
3243                    "ŷ",  // U+0177: LATIN SMALL LETTER Y WITH CIRCUMFLEX
3244                    "ƴ",  // U+01B4: LATIN SMALL LETTER Y WITH HOOK
3245                    "ȳ",  // U+0233: LATIN SMALL LETTER Y WITH MACRON
3246                    "ɏ",  // U+024F: LATIN SMALL LETTER Y WITH STROKE
3247                    "ʎ",  // U+028E: LATIN SMALL LETTER TURNED Y
3248                    "ẏ", // U+1E8F: LATIN SMALL LETTER Y WITH DOT ABOVE
3249                    "ẙ", // U+1E99: LATIN SMALL LETTER Y WITH RING ABOVE
3250                    "ỳ", // U+1EF3: LATIN SMALL LETTER Y WITH GRAVE
3251                    "ỵ", // U+1EF5: LATIN SMALL LETTER Y WITH DOT BELOW
3252                    "ỷ", // U+1EF7: LATIN SMALL LETTER Y WITH HOOK ABOVE
3253                    "ỹ", // U+1EF9: LATIN SMALL LETTER Y WITH TILDE
3254                    "ỿ", // U+1EFF: LATIN SMALL LETTER Y WITH LOOP
3255                    "ⓨ", // U+24E8: CIRCLED LATIN SMALL LETTER Y
3256                    "y", // U+FF59: FULLWIDTH LATIN SMALL LETTER Y
3257                ],
3258                "y",
3259            ),
3260            (
3261                &[
3262                    "⒴", // U+24B4: PARENTHESIZED LATIN SMALL LETTER Y
3263                ],
3264                "(y)",
3265            ),
3266            (
3267                &[
3268                    "Ź",  // U+0179: LATIN CAPITAL LETTER Z WITH ACUTE
3269                    "Ż",  // U+017B: LATIN CAPITAL LETTER Z WITH DOT ABOVE
3270                    "Ž",  // U+017D: LATIN CAPITAL LETTER Z WITH CARON
3271                    "Ƶ",  // U+01B5: LATIN CAPITAL LETTER Z WITH STROKE
3272                    "Ȝ",  // U+021C: LATIN CAPITAL LETTER YOGH
3273                    "Ȥ",  // U+0224: LATIN CAPITAL LETTER Z WITH HOOK
3274                    "ᴢ", // U+1D22: LATIN LETTER SMALL CAPITAL Z
3275                    "Ẑ", // U+1E90: LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
3276                    "Ẓ", // U+1E92: LATIN CAPITAL LETTER Z WITH DOT BELOW
3277                    "Ẕ", // U+1E94: LATIN CAPITAL LETTER Z WITH LINE BELOW
3278                    "Ⓩ", // U+24CF: CIRCLED LATIN CAPITAL LETTER Z
3279                    "Ⱬ", // U+2C6B: LATIN CAPITAL LETTER Z WITH DESCENDER
3280                    "Ꝣ", // U+A762: LATIN CAPITAL LETTER VISIGOTHIC Z
3281                    "Z", // U+FF3A: FULLWIDTH LATIN CAPITAL LETTER Z
3282                ],
3283                "Z",
3284            ),
3285            (
3286                &[
3287                    "ź",  // U+017A: LATIN SMALL LETTER Z WITH ACUTE
3288                    "ż",  // U+017C: LATIN SMALL LETTER Z WITH DOT ABOVE
3289                    "ž",  // U+017E: LATIN SMALL LETTER Z WITH CARON
3290                    "ƶ",  // U+01B6: LATIN SMALL LETTER Z WITH STROKE
3291                    "ȝ",  // U+021D: LATIN SMALL LETTER YOGH
3292                    "ȥ",  // U+0225: LATIN SMALL LETTER Z WITH HOOK
3293                    "ɀ",  // U+0240: LATIN SMALL LETTER Z WITH SWASH TAIL
3294                    "ʐ",  // U+0290: LATIN SMALL LETTER Z WITH RETROFLEX HOOK
3295                    "ʑ",  // U+0291: LATIN SMALL LETTER Z WITH CURL
3296                    "ᵶ", // U+1D76: LATIN SMALL LETTER Z WITH MIDDLE TILDE
3297                    "ᶎ", // U+1D8E: LATIN SMALL LETTER Z WITH PALATAL HOOK
3298                    "ẑ", // U+1E91: LATIN SMALL LETTER Z WITH CIRCUMFLEX
3299                    "ẓ", // U+1E93: LATIN SMALL LETTER Z WITH DOT BELOW
3300                    "ẕ", // U+1E95: LATIN SMALL LETTER Z WITH LINE BELOW
3301                    "ⓩ", // U+24E9: CIRCLED LATIN SMALL LETTER Z
3302                    "ⱬ", // U+2C6C: LATIN SMALL LETTER Z WITH DESCENDER
3303                    "ꝣ", // U+A763: LATIN SMALL LETTER VISIGOTHIC Z
3304                    "z", // U+FF5A: FULLWIDTH LATIN SMALL LETTER Z
3305                ],
3306                "z",
3307            ),
3308            (
3309                &[
3310                    "⒵", // U+24B5: PARENTHESIZED LATIN SMALL LETTER Z
3311                ],
3312                "(z)",
3313            ),
3314            (
3315                &[
3316                    "⁰", // U+2070: SUPERSCRIPT ZERO
3317                    "₀", // U+2080: SUBSCRIPT ZERO
3318                    "⓪", // U+24EA: CIRCLED DIGIT ZERO
3319                    "⓿", // U+24FF: NEGATIVE CIRCLED DIGIT ZERO
3320                    "0", // U+FF10: FULLWIDTH DIGIT ZERO
3321                ],
3322                "0",
3323            ),
3324            (
3325                &[
3326                    "¹",  // U+00B9: SUPERSCRIPT ONE
3327                    "₁", // U+2081: SUBSCRIPT ONE
3328                    "①", // U+2460: CIRCLED DIGIT ONE
3329                    "⓵", // U+24F5: DOUBLE CIRCLED DIGIT ONE
3330                    "❶", // U+2776: DINGBAT NEGATIVE CIRCLED DIGIT ONE
3331                    "➀", // U+2780: DINGBAT CIRCLED SANS-SERIF DIGIT ONE
3332                    "➊", // U+278A: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE
3333                    "1", // U+FF11: FULLWIDTH DIGIT ONE
3334                ],
3335                "1",
3336            ),
3337            (
3338                &[
3339                    "⒈", // U+2488: DIGIT ONE FULL STOP
3340                ],
3341                "1.",
3342            ),
3343            (
3344                &[
3345                    "⑴", // U+2474: PARENTHESIZED DIGIT ONE
3346                ],
3347                "(1)",
3348            ),
3349            (
3350                &[
3351                    "²",  // U+00B2: SUPERSCRIPT TWO
3352                    "₂", // U+2082: SUBSCRIPT TWO
3353                    "②", // U+2461: CIRCLED DIGIT TWO
3354                    "⓶", // U+24F6: DOUBLE CIRCLED DIGIT TWO
3355                    "❷", // U+2777: DINGBAT NEGATIVE CIRCLED DIGIT TWO
3356                    "➁", // U+2781: DINGBAT CIRCLED SANS-SERIF DIGIT TWO
3357                    "➋", // U+278B: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO
3358                    "2", // U+FF12: FULLWIDTH DIGIT TWO
3359                ],
3360                "2",
3361            ),
3362            (
3363                &[
3364                    "⒉", // U+2489: DIGIT TWO FULL STOP
3365                ],
3366                "2.",
3367            ),
3368            (
3369                &[
3370                    "⑵", // U+2475: PARENTHESIZED DIGIT TWO
3371                ],
3372                "(2)",
3373            ),
3374            (
3375                &[
3376                    "³",  // U+00B3: SUPERSCRIPT THREE
3377                    "₃", // U+2083: SUBSCRIPT THREE
3378                    "③", // U+2462: CIRCLED DIGIT THREE
3379                    "⓷", // U+24F7: DOUBLE CIRCLED DIGIT THREE
3380                    "❸", // U+2778: DINGBAT NEGATIVE CIRCLED DIGIT THREE
3381                    "➂", // U+2782: DINGBAT CIRCLED SANS-SERIF DIGIT THREE
3382                    "➌", // U+278C: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE
3383                    "3", // U+FF13: FULLWIDTH DIGIT THREE
3384                ],
3385                "3",
3386            ),
3387            (
3388                &[
3389                    "⒊", // U+248A: DIGIT THREE FULL STOP
3390                ],
3391                "3.",
3392            ),
3393            (
3394                &[
3395                    "⑶", // U+2476: PARENTHESIZED DIGIT THREE
3396                ],
3397                "(3)",
3398            ),
3399            (
3400                &[
3401                    "⁴", // U+2074: SUPERSCRIPT FOUR
3402                    "₄", // U+2084: SUBSCRIPT FOUR
3403                    "④", // U+2463: CIRCLED DIGIT FOUR
3404                    "⓸", // U+24F8: DOUBLE CIRCLED DIGIT FOUR
3405                    "❹", // U+2779: DINGBAT NEGATIVE CIRCLED DIGIT FOUR
3406                    "➃", // U+2783: DINGBAT CIRCLED SANS-SERIF DIGIT FOUR
3407                    "➍", // U+278D: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR
3408                    "4", // U+FF14: FULLWIDTH DIGIT FOUR
3409                ],
3410                "4",
3411            ),
3412            (
3413                &[
3414                    "⒋", // U+248B: DIGIT FOUR FULL STOP
3415                ],
3416                "4.",
3417            ),
3418            (
3419                &[
3420                    "⑷", // U+2477: PARENTHESIZED DIGIT FOUR
3421                ],
3422                "(4)",
3423            ),
3424            (
3425                &[
3426                    "⁵", // U+2075: SUPERSCRIPT FIVE
3427                    "₅", // U+2085: SUBSCRIPT FIVE
3428                    "⑤", // U+2464: CIRCLED DIGIT FIVE
3429                    "⓹", // U+24F9: DOUBLE CIRCLED DIGIT FIVE
3430                    "❺", // U+277A: DINGBAT NEGATIVE CIRCLED DIGIT FIVE
3431                    "➄", // U+2784: DINGBAT CIRCLED SANS-SERIF DIGIT FIVE
3432                    "➎", // U+278E: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE
3433                    "5", // U+FF15: FULLWIDTH DIGIT FIVE
3434                ],
3435                "5",
3436            ),
3437            (
3438                &[
3439                    "⒌", // U+248C: DIGIT FIVE FULL STOP
3440                ],
3441                "5.",
3442            ),
3443            (
3444                &[
3445                    "⑸", // U+2478: PARENTHESIZED DIGIT FIVE
3446                ],
3447                "(5)",
3448            ),
3449            (
3450                &[
3451                    "⁶", // U+2076: SUPERSCRIPT SIX
3452                    "₆", // U+2086: SUBSCRIPT SIX
3453                    "⑥", // U+2465: CIRCLED DIGIT SIX
3454                    "⓺", // U+24FA: DOUBLE CIRCLED DIGIT SIX
3455                    "❻", // U+277B: DINGBAT NEGATIVE CIRCLED DIGIT SIX
3456                    "➅", // U+2785: DINGBAT CIRCLED SANS-SERIF DIGIT SIX
3457                    "➏", // U+278F: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX
3458                    "6", // U+FF16: FULLWIDTH DIGIT SIX
3459                ],
3460                "6",
3461            ),
3462            (
3463                &[
3464                    "⒍", // U+248D: DIGIT SIX FULL STOP
3465                ],
3466                "6.",
3467            ),
3468            (
3469                &[
3470                    "⑹", // U+2479: PARENTHESIZED DIGIT SIX
3471                ],
3472                "(6)",
3473            ),
3474            (
3475                &[
3476                    "⁷", // U+2077: SUPERSCRIPT SEVEN
3477                    "₇", // U+2087: SUBSCRIPT SEVEN
3478                    "⑦", // U+2466: CIRCLED DIGIT SEVEN
3479                    "⓻", // U+24FB: DOUBLE CIRCLED DIGIT SEVEN
3480                    "❼", // U+277C: DINGBAT NEGATIVE CIRCLED DIGIT SEVEN
3481                    "➆", // U+2786: DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN
3482                    "➐", // U+2790: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN
3483                    "7", // U+FF17: FULLWIDTH DIGIT SEVEN
3484                ],
3485                "7",
3486            ),
3487            (
3488                &[
3489                    "⒎", // U+248E: DIGIT SEVEN FULL STOP
3490                ],
3491                "7.",
3492            ),
3493            (
3494                &[
3495                    "⑺", // U+247A: PARENTHESIZED DIGIT SEVEN
3496                ],
3497                "(7)",
3498            ),
3499            (
3500                &[
3501                    "⁸", // U+2078: SUPERSCRIPT EIGHT
3502                    "₈", // U+2088: SUBSCRIPT EIGHT
3503                    "⑧", // U+2467: CIRCLED DIGIT EIGHT
3504                    "⓼", // U+24FC: DOUBLE CIRCLED DIGIT EIGHT
3505                    "❽", // U+277D: DINGBAT NEGATIVE CIRCLED DIGIT EIGHT
3506                    "➇", // U+2787: DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT
3507                    "➑", // U+2791: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT
3508                    "8", // U+FF18: FULLWIDTH DIGIT EIGHT
3509                ],
3510                "8",
3511            ),
3512            (
3513                &[
3514                    "⒏", // U+248F: DIGIT EIGHT FULL STOP
3515                ],
3516                "8.",
3517            ),
3518            (
3519                &[
3520                    "⑻", // U+247B: PARENTHESIZED DIGIT EIGHT
3521                ],
3522                "(8)",
3523            ),
3524            (
3525                &[
3526                    "⁹", // U+2079: SUPERSCRIPT NINE
3527                    "₉", // U+2089: SUBSCRIPT NINE
3528                    "⑨", // U+2468: CIRCLED DIGIT NINE
3529                    "⓽", // U+24FD: DOUBLE CIRCLED DIGIT NINE
3530                    "❾", // U+277E: DINGBAT NEGATIVE CIRCLED DIGIT NINE
3531                    "➈", // U+2788: DINGBAT CIRCLED SANS-SERIF DIGIT NINE
3532                    "➒", // U+2792: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE
3533                    "9", // U+FF19: FULLWIDTH DIGIT NINE
3534                ],
3535                "9",
3536            ),
3537            (
3538                &[
3539                    "⒐", // U+2490: DIGIT NINE FULL STOP
3540                ],
3541                "9.",
3542            ),
3543            (
3544                &[
3545                    "⑼", // U+247C: PARENTHESIZED DIGIT NINE
3546                ],
3547                "(9)",
3548            ),
3549            (
3550                &[
3551                    "⑩", // U+2469: CIRCLED NUMBER TEN
3552                    "⓾", // U+24FE: DOUBLE CIRCLED NUMBER TEN
3553                    "❿", // U+277F: DINGBAT NEGATIVE CIRCLED NUMBER TEN
3554                    "➉", // U+2789: DINGBAT CIRCLED SANS-SERIF NUMBER TEN
3555                    "➓", // U+2793: DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN
3556                ],
3557                "10",
3558            ),
3559            (
3560                &[
3561                    "⒑", // U+2491: NUMBER TEN FULL STOP
3562                ],
3563                "10.",
3564            ),
3565            (
3566                &[
3567                    "⑽", // U+247D: PARENTHESIZED NUMBER TEN
3568                ],
3569                "(10)",
3570            ),
3571            (
3572                &[
3573                    "⑪", // U+246A: CIRCLED NUMBER ELEVEN
3574                    "⓫", // U+24EB: NEGATIVE CIRCLED NUMBER ELEVEN
3575                ],
3576                "11",
3577            ),
3578            (
3579                &[
3580                    "⒒", // U+2492: NUMBER ELEVEN FULL STOP
3581                ],
3582                "11.",
3583            ),
3584            (
3585                &[
3586                    "⑾", // U+247E: PARENTHESIZED NUMBER ELEVEN
3587                ],
3588                "(11)",
3589            ),
3590            (
3591                &[
3592                    "⑫", // U+246B: CIRCLED NUMBER TWELVE
3593                    "⓬", // U+24EC: NEGATIVE CIRCLED NUMBER TWELVE
3594                ],
3595                "12",
3596            ),
3597            (
3598                &[
3599                    "⒓", // U+2493: NUMBER TWELVE FULL STOP
3600                ],
3601                "12.",
3602            ),
3603            (
3604                &[
3605                    "⑿", // U+247F: PARENTHESIZED NUMBER TWELVE
3606                ],
3607                "(12)",
3608            ),
3609            (
3610                &[
3611                    "⑬", // U+246C: CIRCLED NUMBER THIRTEEN
3612                    "⓭", // U+24ED: NEGATIVE CIRCLED NUMBER THIRTEEN
3613                ],
3614                "13",
3615            ),
3616            (
3617                &[
3618                    "⒔", // U+2494: NUMBER THIRTEEN FULL STOP
3619                ],
3620                "13.",
3621            ),
3622            (
3623                &[
3624                    "⒀", // U+2480: PARENTHESIZED NUMBER THIRTEEN
3625                ],
3626                "(13)",
3627            ),
3628            (
3629                &[
3630                    "⑭", // U+246D: CIRCLED NUMBER FOURTEEN
3631                    "⓮", // U+24EE: NEGATIVE CIRCLED NUMBER FOURTEEN
3632                ],
3633                "14",
3634            ),
3635            (
3636                &[
3637                    "⒕", // U+2495: NUMBER FOURTEEN FULL STOP
3638                ],
3639                "14.",
3640            ),
3641            (
3642                &[
3643                    "⒁", // U+2481: PARENTHESIZED NUMBER FOURTEEN
3644                ],
3645                "(14)",
3646            ),
3647            (
3648                &[
3649                    "⑮", // U+246E: CIRCLED NUMBER FIFTEEN
3650                    "⓯", // U+24EF: NEGATIVE CIRCLED NUMBER FIFTEEN
3651                ],
3652                "15",
3653            ),
3654            (
3655                &[
3656                    "⒖", // U+2496: NUMBER FIFTEEN FULL STOP
3657                ],
3658                "15.",
3659            ),
3660            (
3661                &[
3662                    "⒂", // U+2482: PARENTHESIZED NUMBER FIFTEEN
3663                ],
3664                "(15)",
3665            ),
3666            (
3667                &[
3668                    "⑯", // U+246F: CIRCLED NUMBER SIXTEEN
3669                    "⓰", // U+24F0: NEGATIVE CIRCLED NUMBER SIXTEEN
3670                ],
3671                "16",
3672            ),
3673            (
3674                &[
3675                    "⒗", // U+2497: NUMBER SIXTEEN FULL STOP
3676                ],
3677                "16.",
3678            ),
3679            (
3680                &[
3681                    "⒃", // U+2483: PARENTHESIZED NUMBER SIXTEEN
3682                ],
3683                "(16)",
3684            ),
3685            (
3686                &[
3687                    "⑰", // U+2470: CIRCLED NUMBER SEVENTEEN
3688                    "⓱", // U+24F1: NEGATIVE CIRCLED NUMBER SEVENTEEN
3689                ],
3690                "17",
3691            ),
3692            (
3693                &[
3694                    "⒘", // U+2498: NUMBER SEVENTEEN FULL STOP
3695                ],
3696                "17.",
3697            ),
3698            (
3699                &[
3700                    "⒄", // U+2484: PARENTHESIZED NUMBER SEVENTEEN
3701                ],
3702                "(17)",
3703            ),
3704            (
3705                &[
3706                    "⑱", // U+2471: CIRCLED NUMBER EIGHTEEN
3707                    "⓲", // U+24F2: NEGATIVE CIRCLED NUMBER EIGHTEEN
3708                ],
3709                "18",
3710            ),
3711            (
3712                &[
3713                    "⒙", // U+2499: NUMBER EIGHTEEN FULL STOP
3714                ],
3715                "18.",
3716            ),
3717            (
3718                &[
3719                    "⒅", // U+2485: PARENTHESIZED NUMBER EIGHTEEN
3720                ],
3721                "(18)",
3722            ),
3723            (
3724                &[
3725                    "⑲", // U+2472: CIRCLED NUMBER NINETEEN
3726                    "⓳", // U+24F3: NEGATIVE CIRCLED NUMBER NINETEEN
3727                ],
3728                "19",
3729            ),
3730            (
3731                &[
3732                    "⒚", // U+249A: NUMBER NINETEEN FULL STOP
3733                ],
3734                "19.",
3735            ),
3736            (
3737                &[
3738                    "⒆", // U+2486: PARENTHESIZED NUMBER NINETEEN
3739                ],
3740                "(19)",
3741            ),
3742            (
3743                &[
3744                    "⑳", // U+2473: CIRCLED NUMBER TWENTY
3745                    "⓴", // U+24F4: NEGATIVE CIRCLED NUMBER TWENTY
3746                ],
3747                "20",
3748            ),
3749            (
3750                &[
3751                    "⒛", // U+249B: NUMBER TWENTY FULL STOP
3752                ],
3753                "20.",
3754            ),
3755            (
3756                &[
3757                    "⒇", // U+2487: PARENTHESIZED NUMBER TWENTY
3758                ],
3759                "(20)",
3760            ),
3761            (
3762                &[
3763                    "«",  // U+00AB: LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
3764                    "»",  // U+00BB: RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
3765                    "“", // U+201C: LEFT DOUBLE QUOTATION MARK
3766                    "”", // U+201D: RIGHT DOUBLE QUOTATION MARK
3767                    "„", // U+201E: DOUBLE LOW-9 QUOTATION MARK
3768                    "″", // U+2033: DOUBLE PRIME
3769                    "‶", // U+2036: REVERSED DOUBLE PRIME
3770                    "❝", // U+275D: HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT
3771                    "❞", // U+275E: HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT
3772                    "❮", // U+276E: HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT
3773                    "❯", // U+276F: HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT
3774                    """, // U+FF02: FULLWIDTH QUOTATION MARK
3775                ],
3776                "\"",
3777            ),
3778            (
3779                &[
3780                    "‘", // U+2018: LEFT SINGLE QUOTATION MARK
3781                    "’", // U+2019: RIGHT SINGLE QUOTATION MARK
3782                    "‚", // U+201A: SINGLE LOW-9 QUOTATION MARK
3783                    "‛", // U+201B: SINGLE HIGH-REVERSED-9 QUOTATION MARK
3784                    "′", // U+2032: PRIME
3785                    "‵", // U+2035: REVERSED PRIME
3786                    "‹", // U+2039: SINGLE LEFT-POINTING ANGLE QUOTATION MARK
3787                    "›", // U+203A: SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
3788                    "❛", // U+275B: HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT
3789                    "❜", // U+275C: HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT
3790                    "'", // U+FF07: FULLWIDTH APOSTROPHE
3791                ],
3792                "'",
3793            ),
3794            (
3795                &[
3796                    "‐", // U+2010: HYPHEN
3797                    "‑", // U+2011: NON-BREAKING HYPHEN
3798                    "‒", // U+2012: FIGURE DASH
3799                    "–", // U+2013: EN DASH
3800                    "—", // U+2014: EM DASH
3801                    "⁻", // U+207B: SUPERSCRIPT MINUS
3802                    "₋", // U+208B: SUBSCRIPT MINUS
3803                    "-", // U+FF0D: FULLWIDTH HYPHEN-MINUS
3804                ],
3805                "-",
3806            ),
3807            (
3808                &[
3809                    "⁅", // U+2045: LEFT SQUARE BRACKET WITH QUILL
3810                    "❲", // U+2772: LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT
3811                    "[", // U+FF3B: FULLWIDTH LEFT SQUARE BRACKET
3812                ],
3813                "[",
3814            ),
3815            (
3816                &[
3817                    "⁆", // U+2046: RIGHT SQUARE BRACKET WITH QUILL
3818                    "❳", // U+2773: LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT
3819                    "]", // U+FF3D: FULLWIDTH RIGHT SQUARE BRACKET
3820                ],
3821                "]",
3822            ),
3823            (
3824                &[
3825                    "⁽", // U+207D: SUPERSCRIPT LEFT PARENTHESIS
3826                    "₍", // U+208D: SUBSCRIPT LEFT PARENTHESIS
3827                    "❨", // U+2768: MEDIUM LEFT PARENTHESIS ORNAMENT
3828                    "❪", // U+276A: MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT
3829                    "(", // U+FF08: FULLWIDTH LEFT PARENTHESIS
3830                ],
3831                "(",
3832            ),
3833            (
3834                &[
3835                    "⸨", // U+2E28: LEFT DOUBLE PARENTHESIS
3836                ],
3837                "((",
3838            ),
3839            (
3840                &[
3841                    "⁾", // U+207E: SUPERSCRIPT RIGHT PARENTHESIS
3842                    "₎", // U+208E: SUBSCRIPT RIGHT PARENTHESIS
3843                    "❩", // U+2769: MEDIUM RIGHT PARENTHESIS ORNAMENT
3844                    "❫", // U+276B: MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT
3845                    ")", // U+FF09: FULLWIDTH RIGHT PARENTHESIS
3846                ],
3847                ")",
3848            ),
3849            (
3850                &[
3851                    "⸩", // U+2E29: RIGHT DOUBLE PARENTHESIS
3852                ],
3853                "))",
3854            ),
3855            (
3856                &[
3857                    "❬", // U+276C: MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT
3858                    "❰", // U+2770: HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT
3859                    "<", // U+FF1C: FULLWIDTH LESS-THAN SIGN
3860                ],
3861                "<",
3862            ),
3863            (
3864                &[
3865                    "❭", // U+276D: MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT
3866                    "❱", // U+2771: HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT
3867                    ">", // U+FF1E: FULLWIDTH GREATER-THAN SIGN
3868                ],
3869                ">",
3870            ),
3871            (
3872                &[
3873                    "❴", // U+2774: MEDIUM LEFT CURLY BRACKET ORNAMENT
3874                    "{", // U+FF5B: FULLWIDTH LEFT CURLY BRACKET
3875                ],
3876                "{",
3877            ),
3878            (
3879                &[
3880                    "❵", // U+2775: MEDIUM RIGHT CURLY BRACKET ORNAMENT
3881                    "}", // U+FF5D: FULLWIDTH RIGHT CURLY BRACKET
3882                ],
3883                "}",
3884            ),
3885            (
3886                &[
3887                    "⁺", // U+207A: SUPERSCRIPT PLUS SIGN
3888                    "₊", // U+208A: SUBSCRIPT PLUS SIGN
3889                    "+", // U+FF0B: FULLWIDTH PLUS SIGN
3890                ],
3891                "+",
3892            ),
3893            (
3894                &[
3895                    "⁼", // U+207C: SUPERSCRIPT EQUALS SIGN
3896                    "₌", // U+208C: SUBSCRIPT EQUALS SIGN
3897                    "=", // U+FF1D: FULLWIDTH EQUALS SIGN
3898                ],
3899                "=",
3900            ),
3901            (
3902                &[
3903                    "!", // U+FF01: FULLWIDTH EXCLAMATION MARK
3904                ],
3905                "!",
3906            ),
3907            (
3908                &[
3909                    "‼", // U+203C: DOUBLE EXCLAMATION MARK
3910                ],
3911                "!!",
3912            ),
3913            (
3914                &[
3915                    "⁉", // U+2049: EXCLAMATION QUESTION MARK
3916                ],
3917                "!?",
3918            ),
3919            (
3920                &[
3921                    "#", // U+FF03: FULLWIDTH NUMBER SIGN
3922                ],
3923                "#",
3924            ),
3925            (
3926                &[
3927                    "$", // U+FF04: FULLWIDTH DOLLAR SIGN
3928                ],
3929                "$",
3930            ),
3931            (
3932                &[
3933                    "⁒", // U+2052: COMMERCIAL MINUS SIGN
3934                    "%", // U+FF05: FULLWIDTH PERCENT SIGN
3935                ],
3936                "%",
3937            ),
3938            (
3939                &[
3940                    "&", // U+FF06: FULLWIDTH AMPERSAND
3941                ],
3942                "&",
3943            ),
3944            (
3945                &[
3946                    "⁎", // U+204E: LOW ASTERISK
3947                    "*", // U+FF0A: FULLWIDTH ASTERISK
3948                ],
3949                "*",
3950            ),
3951            (
3952                &[
3953                    ",", // U+FF0C: FULLWIDTH COMMA
3954                ],
3955                ",",
3956            ),
3957            (
3958                &[
3959                    ".", // U+FF0E: FULLWIDTH FULL STOP
3960                ],
3961                ".",
3962            ),
3963            (
3964                &[
3965                    "⁄", // U+2044: FRACTION SLASH
3966                    "/", // U+FF0F: FULLWIDTH SOLIDUS
3967                ],
3968                "/",
3969            ),
3970            (
3971                &[
3972                    ":", // U+FF1A: FULLWIDTH COLON
3973                ],
3974                ":",
3975            ),
3976            (
3977                &[
3978                    "⁏", // U+204F: REVERSED SEMICOLON
3979                    ";", // U+FF1B: FULLWIDTH SEMICOLON
3980                ],
3981                ";",
3982            ),
3983            (
3984                &[
3985                    "?", // U+FF1F: FULLWIDTH QUESTION MARK
3986                ],
3987                "?",
3988            ),
3989            (
3990                &[
3991                    "⁇", // U+2047: DOUBLE QUESTION MARK
3992                ],
3993                "??",
3994            ),
3995            (
3996                &[
3997                    "⁈", // U+2048: QUESTION EXCLAMATION MARK
3998                ],
3999                "?!",
4000            ),
4001            (
4002                &[
4003                    "@", // U+FF20: FULLWIDTH COMMERCIAL AT
4004                ],
4005                "@",
4006            ),
4007            (
4008                &[
4009                    "\", // U+FF3C: FULLWIDTH REVERSE SOLIDUS
4010                ],
4011                "\\",
4012            ),
4013            (
4014                &[
4015                    "‸", // U+2038: CARET
4016                    "^", // U+FF3E: FULLWIDTH CIRCUMFLEX ACCENT
4017                ],
4018                "^",
4019            ),
4020            (
4021                &[
4022                    "_", // U+FF3F: FULLWIDTH LOW LINE
4023                ],
4024                "_",
4025            ),
4026            (
4027                &[
4028                    "⁓", // U+2053: SWUNG DASH
4029                    "~", // U+FF5E: FULLWIDTH TILDE
4030                ],
4031                "~",
4032            ),
4033        ];
4034
4035        for (characters, folded) in foldings {
4036            for &c in characters {
4037                assert_eq!(
4038                    folding_using_raw_tokenizer_helper(c),
4039                    folded,
4040                    "testing that character \"{}\" becomes \"{}\"",
4041                    c,
4042                    folded
4043                );
4044            }
4045        }
4046    }
4047}