summavy/tokenizer/ascii_folding_filter.rs
1use std::mem;
2
3use super::{BoxTokenStream, Token, TokenFilter, TokenStream};
4
5/// This class converts alphabetic, numeric, and symbolic Unicode characters
6/// which are not in the first 127 ASCII characters (the "Basic Latin" Unicode
7/// block) into their ASCII equivalents, if one exists.
8#[derive(Clone)]
9pub struct AsciiFoldingFilter;
10
11impl TokenFilter for AsciiFoldingFilter {
12 fn transform<'a>(&self, token_stream: BoxTokenStream<'a>) -> BoxTokenStream<'a> {
13 From::from(AsciiFoldingFilterTokenStream {
14 tail: token_stream,
15 buffer: String::with_capacity(100),
16 })
17 }
18}
19
20pub struct AsciiFoldingFilterTokenStream<'a> {
21 buffer: String,
22 tail: BoxTokenStream<'a>,
23}
24
25impl<'a> TokenStream for AsciiFoldingFilterTokenStream<'a> {
26 fn advance(&mut self) -> bool {
27 if !self.tail.advance() {
28 return false;
29 }
30 if !self.token_mut().text.is_ascii() {
31 // ignore its already ascii
32 to_ascii(&self.tail.token().text, &mut self.buffer);
33 mem::swap(&mut self.tail.token_mut().text, &mut self.buffer);
34 }
35 true
36 }
37
38 fn token(&self) -> &Token {
39 self.tail.token()
40 }
41
42 fn token_mut(&mut self) -> &mut Token {
43 self.tail.token_mut()
44 }
45}
46
47// Returns a string that represents the ascii folded version of
48// the character. If the `char` does not require ascii folding
49// (e.g. simple ASCII chars like `A`) or if the `char`
50// does not have a sensible ascii equivalent (e.g.: Kanjis like 馬,
51// this function returns `None`.
52fn fold_non_ascii_char(c: char) -> Option<&'static str> {
53 match c {
54 '\u{00C0}' | // À [LATIN CAPITAL LETTER A WITH GRAVE]
55 '\u{00C1}' | // Á [LATIN CAPITAL LETTER A WITH ACUTE]
56 '\u{00C2}' | // Â [LATIN CAPITAL LETTER A WITH CIRCUMFLEX]
57 '\u{00C3}' | // Ã [LATIN CAPITAL LETTER A WITH TILDE]
58 '\u{00C4}' | // Ä [LATIN CAPITAL LETTER A WITH DIAERESIS]
59 '\u{00C5}' | // Å [LATIN CAPITAL LETTER A WITH RING ABOVE]
60 '\u{0100}' | // Ā [LATIN CAPITAL LETTER A WITH MACRON]
61 '\u{0102}' | // Ă [LATIN CAPITAL LETTER A WITH BREVE]
62 '\u{0104}' | // Ą [LATIN CAPITAL LETTER A WITH OGONEK]
63 '\u{018F}' | // Ə http://en.wikipedia.org/wiki/Schwa [LATIN CAPITAL LETTER SCHWA]
64 '\u{01CD}' | // Ǎ [LATIN CAPITAL LETTER A WITH CARON]
65 '\u{01DE}' | // Ǟ [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON]
66 '\u{01E0}' | // Ǡ [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON]
67 '\u{01FA}' | // Ǻ [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE]
68 '\u{0200}' | // Ȁ [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE]
69 '\u{0202}' | // Ȃ [LATIN CAPITAL LETTER A WITH INVERTED BREVE]
70 '\u{0226}' | // Ȧ [LATIN CAPITAL LETTER A WITH DOT ABOVE]
71 '\u{023A}' | // Ⱥ [LATIN CAPITAL LETTER A WITH STROKE]
72 '\u{1D00}' | // ᴀ [LATIN LETTER SMALL CAPITAL A]
73 '\u{1E00}' | // Ḁ [LATIN CAPITAL LETTER A WITH RING BELOW]
74 '\u{1EA0}' | // Ạ [LATIN CAPITAL LETTER A WITH DOT BELOW]
75 '\u{1EA2}' | // Ả [LATIN CAPITAL LETTER A WITH HOOK ABOVE]
76 '\u{1EA4}' | // Ấ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE]
77 '\u{1EA6}' | // Ầ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE]
78 '\u{1EA8}' | // Ẩ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
79 '\u{1EAA}' | // Ẫ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE]
80 '\u{1EAC}' | // Ậ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
81 '\u{1EAE}' | // Ắ [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE]
82 '\u{1EB0}' | // Ằ [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE]
83 '\u{1EB2}' | // Ẳ [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE]
84 '\u{1EB4}' | // Ẵ [LATIN CAPITAL LETTER A WITH BREVE AND TILDE]
85 '\u{1EB6}' | // Ặ [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW]
86 '\u{24B6}' | // Ⓐ [CIRCLED LATIN CAPITAL LETTER A]
87 '\u{FF21}' // A [FULLWIDTH LATIN CAPITAL LETTER A]
88 => Some("A"),
89 '\u{00E0}' | // à [LATIN SMALL LETTER A WITH GRAVE]
90 '\u{00E1}' | // á [LATIN SMALL LETTER A WITH ACUTE]
91 '\u{00E2}' | // â [LATIN SMALL LETTER A WITH CIRCUMFLEX]
92 '\u{00E3}' | // ã [LATIN SMALL LETTER A WITH TILDE]
93 '\u{00E4}' | // ä [LATIN SMALL LETTER A WITH DIAERESIS]
94 '\u{00E5}' | // å [LATIN SMALL LETTER A WITH RING ABOVE]
95 '\u{0101}' | // ā [LATIN SMALL LETTER A WITH MACRON]
96 '\u{0103}' | // ă [LATIN SMALL LETTER A WITH BREVE]
97 '\u{0105}' | // ą [LATIN SMALL LETTER A WITH OGONEK]
98 '\u{01CE}' | // ǎ [LATIN SMALL LETTER A WITH CARON]
99 '\u{01DF}' | // ǟ [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON]
100 '\u{01E1}' | // ǡ [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON]
101 '\u{01FB}' | // ǻ [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE]
102 '\u{0201}' | // ȁ [LATIN SMALL LETTER A WITH DOUBLE GRAVE]
103 '\u{0203}' | // ȃ [LATIN SMALL LETTER A WITH INVERTED BREVE]
104 '\u{0227}' | // ȧ [LATIN SMALL LETTER A WITH DOT ABOVE]
105 '\u{0250}' | // ɐ [LATIN SMALL LETTER TURNED A]
106 '\u{0259}' | // ə [LATIN SMALL LETTER SCHWA]
107 '\u{025A}' | // ɚ [LATIN SMALL LETTER SCHWA WITH HOOK]
108 '\u{1D8F}' | // ᶏ [LATIN SMALL LETTER A WITH RETROFLEX HOOK]
109 '\u{1D95}' | // ᶕ [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK]
110 '\u{1E01}' | // ạ [LATIN SMALL LETTER A WITH RING BELOW]
111 '\u{1E9A}' | // ả [LATIN SMALL LETTER A WITH RIGHT HALF RING]
112 '\u{1EA1}' | // ạ [LATIN SMALL LETTER A WITH DOT BELOW]
113 '\u{1EA3}' | // ả [LATIN SMALL LETTER A WITH HOOK ABOVE]
114 '\u{1EA5}' | // ấ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE]
115 '\u{1EA7}' | // ầ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE]
116 '\u{1EA9}' | // ẩ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
117 '\u{1EAB}' | // ẫ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE]
118 '\u{1EAD}' | // ậ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
119 '\u{1EAF}' | // ắ [LATIN SMALL LETTER A WITH BREVE AND ACUTE]
120 '\u{1EB1}' | // ằ [LATIN SMALL LETTER A WITH BREVE AND GRAVE]
121 '\u{1EB3}' | // ẳ [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE]
122 '\u{1EB5}' | // ẵ [LATIN SMALL LETTER A WITH BREVE AND TILDE]
123 '\u{1EB7}' | // ặ [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW]
124 '\u{2090}' | // ₐ [LATIN SUBSCRIPT SMALL LETTER A]
125 '\u{2094}' | // ₔ [LATIN SUBSCRIPT SMALL LETTER SCHWA]
126 '\u{24D0}' | // ⓐ [CIRCLED LATIN SMALL LETTER A]
127 '\u{2C65}' | // ⱥ [LATIN SMALL LETTER A WITH STROKE]
128 '\u{2C6F}' | // Ɐ [LATIN CAPITAL LETTER TURNED A]
129 '\u{FF41}' // a [FULLWIDTH LATIN SMALL LETTER A]
130 => Some("a"),
131 '\u{A732}' // Ꜳ [LATIN CAPITAL LETTER AA]
132 => Some("AA"),
133 '\u{00C6}' | // Æ [LATIN CAPITAL LETTER AE]
134 '\u{01E2}' | // Ǣ [LATIN CAPITAL LETTER AE WITH MACRON]
135 '\u{01FC}' | // Ǽ [LATIN CAPITAL LETTER AE WITH ACUTE]
136 '\u{1D01}' // ᴁ [LATIN LETTER SMALL CAPITAL AE]
137 => Some("AE"),
138 '\u{A734}' // Ꜵ [LATIN CAPITAL LETTER AO]
139 => Some("AO"),
140 '\u{A736}' // Ꜷ [LATIN CAPITAL LETTER AU]
141 => Some("AU"),
142 '\u{A738}' | // Ꜹ [LATIN CAPITAL LETTER AV]
143 '\u{A73A}' // Ꜻ [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR]
144 => Some("AV"),
145 '\u{A73C}' // Ꜽ [LATIN CAPITAL LETTER AY]
146 => Some("AY"),
147 '\u{249C}' // ⒜ [PARENTHESIZED LATIN SMALL LETTER A]
148 => Some("(a)"),
149 '\u{A733}' // ꜳ [LATIN SMALL LETTER AA]
150 => Some("aa"),
151 '\u{00E6}' | // æ [LATIN SMALL LETTER AE]
152 '\u{01E3}' | // ǣ [LATIN SMALL LETTER AE WITH MACRON]
153 '\u{01FD}' | // ǽ [LATIN SMALL LETTER AE WITH ACUTE]
154 '\u{1D02}' // ᴂ [LATIN SMALL LETTER TURNED AE]
155 => Some("ae"),
156 '\u{A735}' // ꜵ [LATIN SMALL LETTER AO]
157 => Some("ao"),
158 '\u{A737}' // ꜷ [LATIN SMALL LETTER AU]
159 => Some("au"),
160 '\u{A739}' | // ꜹ [LATIN SMALL LETTER AV]
161 '\u{A73B}' // ꜻ [LATIN SMALL LETTER AV WITH HORIZONTAL BAR]
162 => Some("av"),
163 '\u{A73D}' // ꜽ [LATIN SMALL LETTER AY]
164 => Some("ay"),
165 '\u{0181}' | // Ɓ [LATIN CAPITAL LETTER B WITH HOOK]
166 '\u{0182}' | // Ƃ [LATIN CAPITAL LETTER B WITH TOPBAR]
167 '\u{0243}' | // Ƀ [LATIN CAPITAL LETTER B WITH STROKE]
168 '\u{0299}' | // ʙ [LATIN LETTER SMALL CAPITAL B]
169 '\u{1D03}' | // ᴃ [LATIN LETTER SMALL CAPITAL BARRED B]
170 '\u{1E02}' | // Ḃ [LATIN CAPITAL LETTER B WITH DOT ABOVE]
171 '\u{1E04}' | // Ḅ [LATIN CAPITAL LETTER B WITH DOT BELOW]
172 '\u{1E06}' | // Ḇ [LATIN CAPITAL LETTER B WITH LINE BELOW]
173 '\u{24B7}' | // Ⓑ [CIRCLED LATIN CAPITAL LETTER B]
174 '\u{FF22}' // B [FULLWIDTH LATIN CAPITAL LETTER B]
175 => Some("B"),
176 '\u{0180}' | // ƀ [LATIN SMALL LETTER B WITH STROKE]
177 '\u{0183}' | // ƃ [LATIN SMALL LETTER B WITH TOPBAR]
178 '\u{0253}' | // ɓ [LATIN SMALL LETTER B WITH HOOK]
179 '\u{1D6C}' | // ᵬ [LATIN SMALL LETTER B WITH MIDDLE TILDE]
180 '\u{1D80}' | // ᶀ [LATIN SMALL LETTER B WITH PALATAL HOOK]
181 '\u{1E03}' | // ḃ [LATIN SMALL LETTER B WITH DOT ABOVE]
182 '\u{1E05}' | // ḅ [LATIN SMALL LETTER B WITH DOT BELOW]
183 '\u{1E07}' | // ḇ [LATIN SMALL LETTER B WITH LINE BELOW]
184 '\u{24D1}' | // ⓑ [CIRCLED LATIN SMALL LETTER B]
185 '\u{FF42}' // b [FULLWIDTH LATIN SMALL LETTER B]
186 => Some("b"),
187 '\u{249D}' // ⒝ [PARENTHESIZED LATIN SMALL LETTER B]
188 => Some("(b)"),
189 '\u{00C7}' | // Ç [LATIN CAPITAL LETTER C WITH CEDILLA]
190 '\u{0106}' | // Ć [LATIN CAPITAL LETTER C WITH ACUTE]
191 '\u{0108}' | // Ĉ [LATIN CAPITAL LETTER C WITH CIRCUMFLEX]
192 '\u{010A}' | // Ċ [LATIN CAPITAL LETTER C WITH DOT ABOVE]
193 '\u{010C}' | // Č [LATIN CAPITAL LETTER C WITH CARON]
194 '\u{0187}' | // Ƈ [LATIN CAPITAL LETTER C WITH HOOK]
195 '\u{023B}' | // Ȼ [LATIN CAPITAL LETTER C WITH STROKE]
196 '\u{0297}' | // ʗ [LATIN LETTER STRETCHED C]
197 '\u{1D04}' | // ᴄ [LATIN LETTER SMALL CAPITAL C]
198 '\u{1E08}' | // Ḉ [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE]
199 '\u{24B8}' | // Ⓒ [CIRCLED LATIN CAPITAL LETTER C]
200 '\u{FF23}' // C [FULLWIDTH LATIN CAPITAL LETTER C]
201 => Some("C"),
202 '\u{00E7}' | // ç [LATIN SMALL LETTER C WITH CEDILLA]
203 '\u{0107}' | // ć [LATIN SMALL LETTER C WITH ACUTE]
204 '\u{0109}' | // ĉ [LATIN SMALL LETTER C WITH CIRCUMFLEX]
205 '\u{010B}' | // ċ [LATIN SMALL LETTER C WITH DOT ABOVE]
206 '\u{010D}' | // č [LATIN SMALL LETTER C WITH CARON]
207 '\u{0188}' | // ƈ [LATIN SMALL LETTER C WITH HOOK]
208 '\u{023C}' | // ȼ [LATIN SMALL LETTER C WITH STROKE]
209 '\u{0255}' | // ɕ [LATIN SMALL LETTER C WITH CURL]
210 '\u{1E09}' | // ḉ [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE]
211 '\u{2184}' | // ↄ [LATIN SMALL LETTER REVERSED C]
212 '\u{24D2}' | // ⓒ [CIRCLED LATIN SMALL LETTER C]
213 '\u{A73E}' | // Ꜿ [LATIN CAPITAL LETTER REVERSED C WITH DOT]
214 '\u{A73F}' | // ꜿ [LATIN SMALL LETTER REVERSED C WITH DOT]
215 '\u{FF43}' // c [FULLWIDTH LATIN SMALL LETTER C]
216 => Some("c"),
217 '\u{249E}' // ⒞ [PARENTHESIZED LATIN SMALL LETTER C]
218 => Some("(c)"),
219 '\u{00D0}' | // Ð [LATIN CAPITAL LETTER ETH]
220 '\u{010E}' | // Ď [LATIN CAPITAL LETTER D WITH CARON]
221 '\u{0110}' | // Đ [LATIN CAPITAL LETTER D WITH STROKE]
222 '\u{0189}' | // Ɖ [LATIN CAPITAL LETTER AFRICAN D]
223 '\u{018A}' | // Ɗ [LATIN CAPITAL LETTER D WITH HOOK]
224 '\u{018B}' | // Ƌ [LATIN CAPITAL LETTER D WITH TOPBAR]
225 '\u{1D05}' | // ᴅ [LATIN LETTER SMALL CAPITAL D]
226 '\u{1D06}' | // ᴆ [LATIN LETTER SMALL CAPITAL ETH]
227 '\u{1E0A}' | // Ḋ [LATIN CAPITAL LETTER D WITH DOT ABOVE]
228 '\u{1E0C}' | // Ḍ [LATIN CAPITAL LETTER D WITH DOT BELOW]
229 '\u{1E0E}' | // Ḏ [LATIN CAPITAL LETTER D WITH LINE BELOW]
230 '\u{1E10}' | // Ḑ [LATIN CAPITAL LETTER D WITH CEDILLA]
231 '\u{1E12}' | // Ḓ [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW]
232 '\u{24B9}' | // Ⓓ [CIRCLED LATIN CAPITAL LETTER D]
233 '\u{A779}' | // Ꝺ [LATIN CAPITAL LETTER INSULAR D]
234 '\u{FF24}' // D [FULLWIDTH LATIN CAPITAL LETTER D]
235 => Some("D"),
236 '\u{00F0}' | // ð [LATIN SMALL LETTER ETH]
237 '\u{010F}' | // ď [LATIN SMALL LETTER D WITH CARON]
238 '\u{0111}' | // đ [LATIN SMALL LETTER D WITH STROKE]
239 '\u{018C}' | // ƌ [LATIN SMALL LETTER D WITH TOPBAR]
240 '\u{0221}' | // ȡ [LATIN SMALL LETTER D WITH CURL]
241 '\u{0256}' | // ɖ [LATIN SMALL LETTER D WITH TAIL]
242 '\u{0257}' | // ɗ [LATIN SMALL LETTER D WITH HOOK]
243 '\u{1D6D}' | // ᵭ [LATIN SMALL LETTER D WITH MIDDLE TILDE]
244 '\u{1D81}' | // ᶁ [LATIN SMALL LETTER D WITH PALATAL HOOK]
245 '\u{1D91}' | // ᶑ [LATIN SMALL LETTER D WITH HOOK AND TAIL]
246 '\u{1E0B}' | // ḋ [LATIN SMALL LETTER D WITH DOT ABOVE]
247 '\u{1E0D}' | // ḍ [LATIN SMALL LETTER D WITH DOT BELOW]
248 '\u{1E0F}' | // ḏ [LATIN SMALL LETTER D WITH LINE BELOW]
249 '\u{1E11}' | // ḑ [LATIN SMALL LETTER D WITH CEDILLA]
250 '\u{1E13}' | // ḓ [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW]
251 '\u{24D3}' | // ⓓ [CIRCLED LATIN SMALL LETTER D]
252 '\u{A77A}' | // ꝺ [LATIN SMALL LETTER INSULAR D]
253 '\u{FF44}' // d [FULLWIDTH LATIN SMALL LETTER D]
254 => Some("d"),
255 '\u{01C4}' | // DŽ [LATIN CAPITAL LETTER DZ WITH CARON]
256 '\u{01F1}' // DZ [LATIN CAPITAL LETTER DZ]
257 => Some("DZ"),
258 '\u{01C5}' | // Dž [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON]
259 '\u{01F2}' // Dz [LATIN CAPITAL LETTER D WITH SMALL LETTER Z]
260 => Some("Dz"),
261 '\u{249F}' // ⒟ [PARENTHESIZED LATIN SMALL LETTER D]
262 => Some("(d)"),
263 '\u{0238}' // ȸ [LATIN SMALL LETTER DB DIGRAPH]
264 => Some("db"),
265 '\u{01C6}' | // dž [LATIN SMALL LETTER DZ WITH CARON]
266 '\u{01F3}' | // dz [LATIN SMALL LETTER DZ]
267 '\u{02A3}' | // ʣ [LATIN SMALL LETTER DZ DIGRAPH]
268 '\u{02A5}' // ʥ [LATIN SMALL LETTER DZ DIGRAPH WITH CURL]
269 => Some("dz"),
270 '\u{00C8}' | // È [LATIN CAPITAL LETTER E WITH GRAVE]
271 '\u{00C9}' | // É [LATIN CAPITAL LETTER E WITH ACUTE]
272 '\u{00CA}' | // Ê [LATIN CAPITAL LETTER E WITH CIRCUMFLEX]
273 '\u{00CB}' | // Ë [LATIN CAPITAL LETTER E WITH DIAERESIS]
274 '\u{0112}' | // Ē [LATIN CAPITAL LETTER E WITH MACRON]
275 '\u{0114}' | // Ĕ [LATIN CAPITAL LETTER E WITH BREVE]
276 '\u{0116}' | // Ė [LATIN CAPITAL LETTER E WITH DOT ABOVE]
277 '\u{0118}' | // Ę [LATIN CAPITAL LETTER E WITH OGONEK]
278 '\u{011A}' | // Ě [LATIN CAPITAL LETTER E WITH CARON]
279 '\u{018E}' | // Ǝ [LATIN CAPITAL LETTER REVERSED E]
280 '\u{0190}' | // Ɛ [LATIN CAPITAL LETTER OPEN E]
281 '\u{0204}' | // Ȅ [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE]
282 '\u{0206}' | // Ȇ [LATIN CAPITAL LETTER E WITH INVERTED BREVE]
283 '\u{0228}' | // Ȩ [LATIN CAPITAL LETTER E WITH CEDILLA]
284 '\u{0246}' | // Ɇ [LATIN CAPITAL LETTER E WITH STROKE]
285 '\u{1D07}' | // ᴇ [LATIN LETTER SMALL CAPITAL E]
286 '\u{1E14}' | // Ḕ [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE]
287 '\u{1E16}' | // Ḗ [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE]
288 '\u{1E18}' | // Ḙ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW]
289 '\u{1E1A}' | // Ḛ [LATIN CAPITAL LETTER E WITH TILDE BELOW]
290 '\u{1E1C}' | // Ḝ [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE]
291 '\u{1EB8}' | // Ẹ [LATIN CAPITAL LETTER E WITH DOT BELOW]
292 '\u{1EBA}' | // Ẻ [LATIN CAPITAL LETTER E WITH HOOK ABOVE]
293 '\u{1EBC}' | // Ẽ [LATIN CAPITAL LETTER E WITH TILDE]
294 '\u{1EBE}' | // Ế [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE]
295 '\u{1EC0}' | // Ề [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE]
296 '\u{1EC2}' | // Ể [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
297 '\u{1EC4}' | // Ễ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE]
298 '\u{1EC6}' | // Ệ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
299 '\u{24BA}' | // Ⓔ [CIRCLED LATIN CAPITAL LETTER E]
300 '\u{2C7B}' | // ⱻ [LATIN LETTER SMALL CAPITAL TURNED E]
301 '\u{FF25}' // E [FULLWIDTH LATIN CAPITAL LETTER E]
302 => Some("E"),
303 '\u{00E8}' | // è [LATIN SMALL LETTER E WITH GRAVE]
304 '\u{00E9}' | // é [LATIN SMALL LETTER E WITH ACUTE]
305 '\u{00EA}' | // ê [LATIN SMALL LETTER E WITH CIRCUMFLEX]
306 '\u{00EB}' | // ë [LATIN SMALL LETTER E WITH DIAERESIS]
307 '\u{0113}' | // ē [LATIN SMALL LETTER E WITH MACRON]
308 '\u{0115}' | // ĕ [LATIN SMALL LETTER E WITH BREVE]
309 '\u{0117}' | // ė [LATIN SMALL LETTER E WITH DOT ABOVE]
310 '\u{0119}' | // ę [LATIN SMALL LETTER E WITH OGONEK]
311 '\u{011B}' | // ě [LATIN SMALL LETTER E WITH CARON]
312 '\u{01DD}' | // ǝ [LATIN SMALL LETTER TURNED E]
313 '\u{0205}' | // ȅ [LATIN SMALL LETTER E WITH DOUBLE GRAVE]
314 '\u{0207}' | // ȇ [LATIN SMALL LETTER E WITH INVERTED BREVE]
315 '\u{0229}' | // ȩ [LATIN SMALL LETTER E WITH CEDILLA]
316 '\u{0247}' | // ɇ [LATIN SMALL LETTER E WITH STROKE]
317 '\u{0258}' | // ɘ [LATIN SMALL LETTER REVERSED E]
318 '\u{025B}' | // ɛ [LATIN SMALL LETTER OPEN E]
319 '\u{025C}' | // ɜ [LATIN SMALL LETTER REVERSED OPEN E]
320 '\u{025D}' | // ɝ [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK]
321 '\u{025E}' | // ɞ [LATIN SMALL LETTER CLOSED REVERSED OPEN E]
322 '\u{029A}' | // ʚ [LATIN SMALL LETTER CLOSED OPEN E]
323 '\u{1D08}' | // ᴈ [LATIN SMALL LETTER TURNED OPEN E]
324 '\u{1D92}' | // ᶒ [LATIN SMALL LETTER E WITH RETROFLEX HOOK]
325 '\u{1D93}' | // ᶓ [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK]
326 '\u{1D94}' | // ᶔ [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK]
327 '\u{1E15}' | // ḕ [LATIN SMALL LETTER E WITH MACRON AND GRAVE]
328 '\u{1E17}' | // ḗ [LATIN SMALL LETTER E WITH MACRON AND ACUTE]
329 '\u{1E19}' | // ḙ [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW]
330 '\u{1E1B}' | // ḛ [LATIN SMALL LETTER E WITH TILDE BELOW]
331 '\u{1E1D}' | // ḝ [LATIN SMALL LETTER E WITH CEDILLA AND BREVE]
332 '\u{1EB9}' | // ẹ [LATIN SMALL LETTER E WITH DOT BELOW]
333 '\u{1EBB}' | // ẻ [LATIN SMALL LETTER E WITH HOOK ABOVE]
334 '\u{1EBD}' | // ẽ [LATIN SMALL LETTER E WITH TILDE]
335 '\u{1EBF}' | // ế [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE]
336 '\u{1EC1}' | // ề [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE]
337 '\u{1EC3}' | // ể [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
338 '\u{1EC5}' | // ễ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE]
339 '\u{1EC7}' | // ệ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
340 '\u{2091}' | // ₑ [LATIN SUBSCRIPT SMALL LETTER E]
341 '\u{24D4}' | // ⓔ [CIRCLED LATIN SMALL LETTER E]
342 '\u{2C78}' | // ⱸ [LATIN SMALL LETTER E WITH NOTCH]
343 '\u{FF45}' // e [FULLWIDTH LATIN SMALL LETTER E]
344 => Some("e"),
345 '\u{24A0}' // ⒠ [PARENTHESIZED LATIN SMALL LETTER E]
346 => Some("(e)"),
347 '\u{0191}' | // Ƒ [LATIN CAPITAL LETTER F WITH HOOK]
348 '\u{1E1E}' | // Ḟ [LATIN CAPITAL LETTER F WITH DOT ABOVE]
349 '\u{24BB}' | // Ⓕ [CIRCLED LATIN CAPITAL LETTER F]
350 '\u{A730}' | // ꜰ [LATIN LETTER SMALL CAPITAL F]
351 '\u{A77B}' | // Ꝼ [LATIN CAPITAL LETTER INSULAR F]
352 '\u{A7FB}' | // ꟻ [LATIN EPIGRAPHIC LETTER REVERSED F]
353 '\u{FF26}' // F [FULLWIDTH LATIN CAPITAL LETTER F]
354 => Some("F"),
355 '\u{0192}' | // ƒ [LATIN SMALL LETTER F WITH HOOK]
356 '\u{1D6E}' | // ᵮ [LATIN SMALL LETTER F WITH MIDDLE TILDE]
357 '\u{1D82}' | // ᶂ [LATIN SMALL LETTER F WITH PALATAL HOOK]
358 '\u{1E1F}' | // ḟ [LATIN SMALL LETTER F WITH DOT ABOVE]
359 '\u{1E9B}' | // ẛ [LATIN SMALL LETTER LONG S WITH DOT ABOVE]
360 '\u{24D5}' | // ⓕ [CIRCLED LATIN SMALL LETTER F]
361 '\u{A77C}' | // ꝼ [LATIN SMALL LETTER INSULAR F]
362 '\u{FF46}' // f [FULLWIDTH LATIN SMALL LETTER F]
363 => Some("f"),
364 '\u{24A1}' // ⒡ [PARENTHESIZED LATIN SMALL LETTER F]
365 => Some("(f)"),
366 '\u{FB00}' // ff [LATIN SMALL LIGATURE FF]
367 => Some("ff"),
368 '\u{FB03}' // ffi [LATIN SMALL LIGATURE FFI]
369 => Some("ffi"),
370 '\u{FB04}' // ffl [LATIN SMALL LIGATURE FFL]
371 => Some("ffl"),
372 '\u{FB01}' // fi [LATIN SMALL LIGATURE FI]
373 => Some("fi"),
374 '\u{FB02}' // fl [LATIN SMALL LIGATURE FL]
375 => Some("fl"),
376 '\u{011C}' | // Ĝ [LATIN CAPITAL LETTER G WITH CIRCUMFLEX]
377 '\u{011E}' | // Ğ [LATIN CAPITAL LETTER G WITH BREVE]
378 '\u{0120}' | // Ġ [LATIN CAPITAL LETTER G WITH DOT ABOVE]
379 '\u{0122}' | // Ģ [LATIN CAPITAL LETTER G WITH CEDILLA]
380 '\u{0193}' | // Ɠ [LATIN CAPITAL LETTER G WITH HOOK]
381 '\u{01E4}' | // Ǥ [LATIN CAPITAL LETTER G WITH STROKE]
382 '\u{01E5}' | // ǥ [LATIN SMALL LETTER G WITH STROKE]
383 '\u{01E6}' | // Ǧ [LATIN CAPITAL LETTER G WITH CARON]
384 '\u{01E7}' | // ǧ [LATIN SMALL LETTER G WITH CARON]
385 '\u{01F4}' | // Ǵ [LATIN CAPITAL LETTER G WITH ACUTE]
386 '\u{0262}' | // ɢ [LATIN LETTER SMALL CAPITAL G]
387 '\u{029B}' | // ʛ [LATIN LETTER SMALL CAPITAL G WITH HOOK]
388 '\u{1E20}' | // Ḡ [LATIN CAPITAL LETTER G WITH MACRON]
389 '\u{24BC}' | // Ⓖ [CIRCLED LATIN CAPITAL LETTER G]
390 '\u{A77D}' | // Ᵹ [LATIN CAPITAL LETTER INSULAR G]
391 '\u{A77E}' | // Ꝿ [LATIN CAPITAL LETTER TURNED INSULAR G]
392 '\u{FF27}' // G [FULLWIDTH LATIN CAPITAL LETTER G]
393 => Some("G"),
394 '\u{011D}' | // ĝ [LATIN SMALL LETTER G WITH CIRCUMFLEX]
395 '\u{011F}' | // ğ [LATIN SMALL LETTER G WITH BREVE]
396 '\u{0121}' | // ġ [LATIN SMALL LETTER G WITH DOT ABOVE]
397 '\u{0123}' | // ģ [LATIN SMALL LETTER G WITH CEDILLA]
398 '\u{01F5}' | // ǵ [LATIN SMALL LETTER G WITH ACUTE]
399 '\u{0260}' | // ɠ [LATIN SMALL LETTER G WITH HOOK]
400 '\u{0261}' | // ɡ [LATIN SMALL LETTER SCRIPT G]
401 '\u{1D77}' | // ᵷ [LATIN SMALL LETTER TURNED G]
402 '\u{1D79}' | // ᵹ [LATIN SMALL LETTER INSULAR G]
403 '\u{1D83}' | // ᶃ [LATIN SMALL LETTER G WITH PALATAL HOOK]
404 '\u{1E21}' | // ḡ [LATIN SMALL LETTER G WITH MACRON]
405 '\u{24D6}' | // ⓖ [CIRCLED LATIN SMALL LETTER G]
406 '\u{A77F}' | // ꝿ [LATIN SMALL LETTER TURNED INSULAR G]
407 '\u{FF47}' // g [FULLWIDTH LATIN SMALL LETTER G]
408 => Some("g"),
409 '\u{24A2}' // ⒢ [PARENTHESIZED LATIN SMALL LETTER G]
410 => Some("(g)"),
411 '\u{0124}' | // Ĥ [LATIN CAPITAL LETTER H WITH CIRCUMFLEX]
412 '\u{0126}' | // Ħ [LATIN CAPITAL LETTER H WITH STROKE]
413 '\u{021E}' | // Ȟ [LATIN CAPITAL LETTER H WITH CARON]
414 '\u{029C}' | // ʜ [LATIN LETTER SMALL CAPITAL H]
415 '\u{1E22}' | // Ḣ [LATIN CAPITAL LETTER H WITH DOT ABOVE]
416 '\u{1E24}' | // Ḥ [LATIN CAPITAL LETTER H WITH DOT BELOW]
417 '\u{1E26}' | // Ḧ [LATIN CAPITAL LETTER H WITH DIAERESIS]
418 '\u{1E28}' | // Ḩ [LATIN CAPITAL LETTER H WITH CEDILLA]
419 '\u{1E2A}' | // Ḫ [LATIN CAPITAL LETTER H WITH BREVE BELOW]
420 '\u{24BD}' | // Ⓗ [CIRCLED LATIN CAPITAL LETTER H]
421 '\u{2C67}' | // Ⱨ [LATIN CAPITAL LETTER H WITH DESCENDER]
422 '\u{2C75}' | // Ⱶ [LATIN CAPITAL LETTER HALF H]
423 '\u{FF28}' // H [FULLWIDTH LATIN CAPITAL LETTER H]
424 => Some("H"),
425 '\u{0125}' | // ĥ [LATIN SMALL LETTER H WITH CIRCUMFLEX]
426 '\u{0127}' | // ħ [LATIN SMALL LETTER H WITH STROKE]
427 '\u{021F}' | // ȟ [LATIN SMALL LETTER H WITH CARON]
428 '\u{0265}' | // ɥ [LATIN SMALL LETTER TURNED H]
429 '\u{0266}' | // ɦ [LATIN SMALL LETTER H WITH HOOK]
430 '\u{02AE}' | // ʮ [LATIN SMALL LETTER TURNED H WITH FISHHOOK]
431 '\u{02AF}' | // ʯ [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL]
432 '\u{1E23}' | // ḣ [LATIN SMALL LETTER H WITH DOT ABOVE]
433 '\u{1E25}' | // ḥ [LATIN SMALL LETTER H WITH DOT BELOW]
434 '\u{1E27}' | // ḧ [LATIN SMALL LETTER H WITH DIAERESIS]
435 '\u{1E29}' | // ḩ [LATIN SMALL LETTER H WITH CEDILLA]
436 '\u{1E2B}' | // ḫ [LATIN SMALL LETTER H WITH BREVE BELOW]
437 '\u{1E96}' | // ẖ [LATIN SMALL LETTER H WITH LINE BELOW]
438 '\u{24D7}' | // ⓗ [CIRCLED LATIN SMALL LETTER H]
439 '\u{2C68}' | // ⱨ [LATIN SMALL LETTER H WITH DESCENDER]
440 '\u{2C76}' | // ⱶ [LATIN SMALL LETTER HALF H]
441 '\u{FF48}' // h [FULLWIDTH LATIN SMALL LETTER H]
442 => Some("h"),
443 '\u{01F6}' // Ƕ http://en.wikipedia.org/wiki/Hwair [LATIN CAPITAL LETTER HWAIR]
444 => Some("HV"),
445 '\u{24A3}' // ⒣ [PARENTHESIZED LATIN SMALL LETTER H]
446 => Some("(h)"),
447 '\u{0195}' // ƕ [LATIN SMALL LETTER HV]
448 => Some("hv"),
449 '\u{00CC}' | // Ì [LATIN CAPITAL LETTER I WITH GRAVE]
450 '\u{00CD}' | // Í [LATIN CAPITAL LETTER I WITH ACUTE]
451 '\u{00CE}' | // Î [LATIN CAPITAL LETTER I WITH CIRCUMFLEX]
452 '\u{00CF}' | // Ï [LATIN CAPITAL LETTER I WITH DIAERESIS]
453 '\u{0128}' | // Ĩ [LATIN CAPITAL LETTER I WITH TILDE]
454 '\u{012A}' | // Ī [LATIN CAPITAL LETTER I WITH MACRON]
455 '\u{012C}' | // Ĭ [LATIN CAPITAL LETTER I WITH BREVE]
456 '\u{012E}' | // Į [LATIN CAPITAL LETTER I WITH OGONEK]
457 '\u{0130}' | // İ [LATIN CAPITAL LETTER I WITH DOT ABOVE]
458 '\u{0196}' | // Ɩ [LATIN CAPITAL LETTER IOTA]
459 '\u{0197}' | // Ɨ [LATIN CAPITAL LETTER I WITH STROKE]
460 '\u{01CF}' | // Ǐ [LATIN CAPITAL LETTER I WITH CARON]
461 '\u{0208}' | // Ȉ [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE]
462 '\u{020A}' | // Ȋ [LATIN CAPITAL LETTER I WITH INVERTED BREVE]
463 '\u{026A}' | // ɪ [LATIN LETTER SMALL CAPITAL I]
464 '\u{1D7B}' | // ᵻ [LATIN SMALL CAPITAL LETTER I WITH STROKE]
465 '\u{1E2C}' | // Ḭ [LATIN CAPITAL LETTER I WITH TILDE BELOW]
466 '\u{1E2E}' | // Ḯ [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE]
467 '\u{1EC8}' | // Ỉ [LATIN CAPITAL LETTER I WITH HOOK ABOVE]
468 '\u{1ECA}' | // Ị [LATIN CAPITAL LETTER I WITH DOT BELOW]
469 '\u{24BE}' | // Ⓘ [CIRCLED LATIN CAPITAL LETTER I]
470 '\u{A7FE}' | // ꟾ [LATIN EPIGRAPHIC LETTER I LONGA]
471 '\u{FF29}' // I [FULLWIDTH LATIN CAPITAL LETTER I]
472 => Some("I"),
473 '\u{00EC}' | // ì [LATIN SMALL LETTER I WITH GRAVE]
474 '\u{00ED}' | // í [LATIN SMALL LETTER I WITH ACUTE]
475 '\u{00EE}' | // î [LATIN SMALL LETTER I WITH CIRCUMFLEX]
476 '\u{00EF}' | // ï [LATIN SMALL LETTER I WITH DIAERESIS]
477 '\u{0129}' | // ĩ [LATIN SMALL LETTER I WITH TILDE]
478 '\u{012B}' | // ī [LATIN SMALL LETTER I WITH MACRON]
479 '\u{012D}' | // ĭ [LATIN SMALL LETTER I WITH BREVE]
480 '\u{012F}' | // į [LATIN SMALL LETTER I WITH OGONEK]
481 '\u{0131}' | // ı [LATIN SMALL LETTER DOTLESS I]
482 '\u{01D0}' | // ǐ [LATIN SMALL LETTER I WITH CARON]
483 '\u{0209}' | // ȉ [LATIN SMALL LETTER I WITH DOUBLE GRAVE]
484 '\u{020B}' | // ȋ [LATIN SMALL LETTER I WITH INVERTED BREVE]
485 '\u{0268}' | // ɨ [LATIN SMALL LETTER I WITH STROKE]
486 '\u{1D09}' | // ᴉ [LATIN SMALL LETTER TURNED I]
487 '\u{1D62}' | // ᵢ [LATIN SUBSCRIPT SMALL LETTER I]
488 '\u{1D7C}' | // ᵼ [LATIN SMALL LETTER IOTA WITH STROKE]
489 '\u{1D96}' | // ᶖ [LATIN SMALL LETTER I WITH RETROFLEX HOOK]
490 '\u{1E2D}' | // ḭ [LATIN SMALL LETTER I WITH TILDE BELOW]
491 '\u{1E2F}' | // ḯ [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE]
492 '\u{1EC9}' | // ỉ [LATIN SMALL LETTER I WITH HOOK ABOVE]
493 '\u{1ECB}' | // ị [LATIN SMALL LETTER I WITH DOT BELOW]
494 '\u{2071}' | // ⁱ [SUPERSCRIPT LATIN SMALL LETTER I]
495 '\u{24D8}' | // ⓘ [CIRCLED LATIN SMALL LETTER I]
496 '\u{FF49}' // i [FULLWIDTH LATIN SMALL LETTER I]
497 => Some("i"),
498 '\u{0132}' // IJ [LATIN CAPITAL LIGATURE IJ]
499 => Some("IJ"),
500 '\u{24A4}' // ⒤ [PARENTHESIZED LATIN SMALL LETTER I]
501 => Some("(i)"),
502 '\u{0133}' // ij [LATIN SMALL LIGATURE IJ]
503 => Some("ij"),
504 '\u{0134}' | // Ĵ [LATIN CAPITAL LETTER J WITH CIRCUMFLEX]
505 '\u{0248}' | // Ɉ [LATIN CAPITAL LETTER J WITH STROKE]
506 '\u{1D0A}' | // ᴊ [LATIN LETTER SMALL CAPITAL J]
507 '\u{24BF}' | // Ⓙ [CIRCLED LATIN CAPITAL LETTER J]
508 '\u{FF2A}' // J [FULLWIDTH LATIN CAPITAL LETTER J]
509 => Some("J"),
510 '\u{0135}' | // ĵ [LATIN SMALL LETTER J WITH CIRCUMFLEX]
511 '\u{01F0}' | // ǰ [LATIN SMALL LETTER J WITH CARON]
512 '\u{0237}' | // ȷ [LATIN SMALL LETTER DOTLESS J]
513 '\u{0249}' | // ɉ [LATIN SMALL LETTER J WITH STROKE]
514 '\u{025F}' | // ɟ [LATIN SMALL LETTER DOTLESS J WITH STROKE]
515 '\u{0284}' | // ʄ [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK]
516 '\u{029D}' | // ʝ [LATIN SMALL LETTER J WITH CROSSED-TAIL]
517 '\u{24D9}' | // ⓙ [CIRCLED LATIN SMALL LETTER J]
518 '\u{2C7C}' | // ⱼ [LATIN SUBSCRIPT SMALL LETTER J]
519 '\u{FF4A}' // j [FULLWIDTH LATIN SMALL LETTER J]
520 => Some("j"),
521 '\u{24A5}' // ⒥ [PARENTHESIZED LATIN SMALL LETTER J]
522 => Some("(j)"),
523 '\u{0136}' | // Ķ [LATIN CAPITAL LETTER K WITH CEDILLA]
524 '\u{0198}' | // Ƙ [LATIN CAPITAL LETTER K WITH HOOK]
525 '\u{01E8}' | // Ǩ [LATIN CAPITAL LETTER K WITH CARON]
526 '\u{1D0B}' | // ᴋ [LATIN LETTER SMALL CAPITAL K]
527 '\u{1E30}' | // Ḱ [LATIN CAPITAL LETTER K WITH ACUTE]
528 '\u{1E32}' | // Ḳ [LATIN CAPITAL LETTER K WITH DOT BELOW]
529 '\u{1E34}' | // Ḵ [LATIN CAPITAL LETTER K WITH LINE BELOW]
530 '\u{24C0}' | // Ⓚ [CIRCLED LATIN CAPITAL LETTER K]
531 '\u{2C69}' | // Ⱪ [LATIN CAPITAL LETTER K WITH DESCENDER]
532 '\u{A740}' | // Ꝁ [LATIN CAPITAL LETTER K WITH STROKE]
533 '\u{A742}' | // Ꝃ [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE]
534 '\u{A744}' | // Ꝅ [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE]
535 '\u{FF2B}' // K [FULLWIDTH LATIN CAPITAL LETTER K]
536 => Some("K"),
537 '\u{0137}' | // ķ [LATIN SMALL LETTER K WITH CEDILLA]
538 '\u{0199}' | // ƙ [LATIN SMALL LETTER K WITH HOOK]
539 '\u{01E9}' | // ǩ [LATIN SMALL LETTER K WITH CARON]
540 '\u{029E}' | // ʞ [LATIN SMALL LETTER TURNED K]
541 '\u{1D84}' | // ᶄ [LATIN SMALL LETTER K WITH PALATAL HOOK]
542 '\u{1E31}' | // ḱ [LATIN SMALL LETTER K WITH ACUTE]
543 '\u{1E33}' | // ḳ [LATIN SMALL LETTER K WITH DOT BELOW]
544 '\u{1E35}' | // ḵ [LATIN SMALL LETTER K WITH LINE BELOW]
545 '\u{24DA}' | // ⓚ [CIRCLED LATIN SMALL LETTER K]
546 '\u{2C6A}' | // ⱪ [LATIN SMALL LETTER K WITH DESCENDER]
547 '\u{A741}' | // ꝁ [LATIN SMALL LETTER K WITH STROKE]
548 '\u{A743}' | // ꝃ [LATIN SMALL LETTER K WITH DIAGONAL STROKE]
549 '\u{A745}' | // ꝅ [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE]
550 '\u{FF4B}' // k [FULLWIDTH LATIN SMALL LETTER K]
551 => Some("k"),
552 '\u{24A6}' // ⒦ [PARENTHESIZED LATIN SMALL LETTER K]
553 => Some("(k)"),
554 '\u{0139}' | // Ĺ [LATIN CAPITAL LETTER L WITH ACUTE]
555 '\u{013B}' | // Ļ [LATIN CAPITAL LETTER L WITH CEDILLA]
556 '\u{013D}' | // Ľ [LATIN CAPITAL LETTER L WITH CARON]
557 '\u{013F}' | // Ŀ [LATIN CAPITAL LETTER L WITH MIDDLE DOT]
558 '\u{0141}' | // Ł [LATIN CAPITAL LETTER L WITH STROKE]
559 '\u{023D}' | // Ƚ [LATIN CAPITAL LETTER L WITH BAR]
560 '\u{029F}' | // ʟ [LATIN LETTER SMALL CAPITAL L]
561 '\u{1D0C}' | // ᴌ [LATIN LETTER SMALL CAPITAL L WITH STROKE]
562 '\u{1E36}' | // Ḷ [LATIN CAPITAL LETTER L WITH DOT BELOW]
563 '\u{1E38}' | // Ḹ [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON]
564 '\u{1E3A}' | // Ḻ [LATIN CAPITAL LETTER L WITH LINE BELOW]
565 '\u{1E3C}' | // Ḽ [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW]
566 '\u{24C1}' | // Ⓛ [CIRCLED LATIN CAPITAL LETTER L]
567 '\u{2C60}' | // Ⱡ [LATIN CAPITAL LETTER L WITH DOUBLE BAR]
568 '\u{2C62}' | // Ɫ [LATIN CAPITAL LETTER L WITH MIDDLE TILDE]
569 '\u{A746}' | // Ꝇ [LATIN CAPITAL LETTER BROKEN L]
570 '\u{A748}' | // Ꝉ [LATIN CAPITAL LETTER L WITH HIGH STROKE]
571 '\u{A780}' | // Ꞁ [LATIN CAPITAL LETTER TURNED L]
572 '\u{FF2C}' // L [FULLWIDTH LATIN CAPITAL LETTER L]
573 => Some("L"),
574 '\u{013A}' | // ĺ [LATIN SMALL LETTER L WITH ACUTE]
575 '\u{013C}' | // ļ [LATIN SMALL LETTER L WITH CEDILLA]
576 '\u{013E}' | // ľ [LATIN SMALL LETTER L WITH CARON]
577 '\u{0140}' | // ŀ [LATIN SMALL LETTER L WITH MIDDLE DOT]
578 '\u{0142}' | // ł [LATIN SMALL LETTER L WITH STROKE]
579 '\u{019A}' | // ƚ [LATIN SMALL LETTER L WITH BAR]
580 '\u{0234}' | // ȴ [LATIN SMALL LETTER L WITH CURL]
581 '\u{026B}' | // ɫ [LATIN SMALL LETTER L WITH MIDDLE TILDE]
582 '\u{026C}' | // ɬ [LATIN SMALL LETTER L WITH BELT]
583 '\u{026D}' | // ɭ [LATIN SMALL LETTER L WITH RETROFLEX HOOK]
584 '\u{1D85}' | // ᶅ [LATIN SMALL LETTER L WITH PALATAL HOOK]
585 '\u{1E37}' | // ḷ [LATIN SMALL LETTER L WITH DOT BELOW]
586 '\u{1E39}' | // ḹ [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON]
587 '\u{1E3B}' | // ḻ [LATIN SMALL LETTER L WITH LINE BELOW]
588 '\u{1E3D}' | // ḽ [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW]
589 '\u{24DB}' | // ⓛ [CIRCLED LATIN SMALL LETTER L]
590 '\u{2C61}' | // ⱡ [LATIN SMALL LETTER L WITH DOUBLE BAR]
591 '\u{A747}' | // ꝇ [LATIN SMALL LETTER BROKEN L]
592 '\u{A749}' | // ꝉ [LATIN SMALL LETTER L WITH HIGH STROKE]
593 '\u{A781}' | // ꞁ [LATIN SMALL LETTER TURNED L]
594 '\u{FF4C}' // l [FULLWIDTH LATIN SMALL LETTER L]
595 => Some("l"),
596 '\u{01C7}' // LJ [LATIN CAPITAL LETTER LJ]
597 => Some("LJ"),
598 '\u{1EFA}' // Ỻ [LATIN CAPITAL LETTER MIDDLE-WELSH LL]
599 => Some("LL"),
600 '\u{01C8}' // Lj [LATIN CAPITAL LETTER L WITH SMALL LETTER J]
601 => Some("Lj"),
602 '\u{24A7}' // ⒧ [PARENTHESIZED LATIN SMALL LETTER L]
603 => Some("(l)"),
604 '\u{01C9}' // lj [LATIN SMALL LETTER LJ]
605 => Some("lj"),
606 '\u{1EFB}' // ỻ [LATIN SMALL LETTER MIDDLE-WELSH LL]
607 => Some("ll"),
608 '\u{02AA}' // ʪ [LATIN SMALL LETTER LS DIGRAPH]
609 => Some("ls"),
610 '\u{02AB}' // ʫ [LATIN SMALL LETTER LZ DIGRAPH]
611 => Some("lz"),
612 '\u{019C}' | // Ɯ [LATIN CAPITAL LETTER TURNED M]
613 '\u{1D0D}' | // ᴍ [LATIN LETTER SMALL CAPITAL M]
614 '\u{1E3E}' | // Ḿ [LATIN CAPITAL LETTER M WITH ACUTE]
615 '\u{1E40}' | // Ṁ [LATIN CAPITAL LETTER M WITH DOT ABOVE]
616 '\u{1E42}' | // Ṃ [LATIN CAPITAL LETTER M WITH DOT BELOW]
617 '\u{24C2}' | // Ⓜ [CIRCLED LATIN CAPITAL LETTER M]
618 '\u{2C6E}' | // Ɱ [LATIN CAPITAL LETTER M WITH HOOK]
619 '\u{A7FD}' | // ꟽ [LATIN EPIGRAPHIC LETTER INVERTED M]
620 '\u{A7FF}' | // ꟿ [LATIN EPIGRAPHIC LETTER ARCHAIC M]
621 '\u{FF2D}' // M [FULLWIDTH LATIN CAPITAL LETTER M]
622 => Some("M"),
623 '\u{026F}' | // ɯ [LATIN SMALL LETTER TURNED M]
624 '\u{0270}' | // ɰ [LATIN SMALL LETTER TURNED M WITH LONG LEG]
625 '\u{0271}' | // ɱ [LATIN SMALL LETTER M WITH HOOK]
626 '\u{1D6F}' | // ᵯ [LATIN SMALL LETTER M WITH MIDDLE TILDE]
627 '\u{1D86}' | // ᶆ [LATIN SMALL LETTER M WITH PALATAL HOOK]
628 '\u{1E3F}' | // ḿ [LATIN SMALL LETTER M WITH ACUTE]
629 '\u{1E41}' | // ṁ [LATIN SMALL LETTER M WITH DOT ABOVE]
630 '\u{1E43}' | // ṃ [LATIN SMALL LETTER M WITH DOT BELOW]
631 '\u{24DC}' | // ⓜ [CIRCLED LATIN SMALL LETTER M]
632 '\u{FF4D}' // m [FULLWIDTH LATIN SMALL LETTER M]
633 => Some("m"),
634 '\u{24A8}' // ⒨ [PARENTHESIZED LATIN SMALL LETTER M]
635 => Some("(m)"),
636 '\u{00D1}' | // Ñ [LATIN CAPITAL LETTER N WITH TILDE]
637 '\u{0143}' | // Ń [LATIN CAPITAL LETTER N WITH ACUTE]
638 '\u{0145}' | // Ņ [LATIN CAPITAL LETTER N WITH CEDILLA]
639 '\u{0147}' | // Ň [LATIN CAPITAL LETTER N WITH CARON]
640 '\u{014A}' | // Ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN CAPITAL LETTER ENG]
641 '\u{019D}' | // Ɲ [LATIN CAPITAL LETTER N WITH LEFT HOOK]
642 '\u{01F8}' | // Ǹ [LATIN CAPITAL LETTER N WITH GRAVE]
643 '\u{0220}' | // Ƞ [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG]
644 '\u{0274}' | // ɴ [LATIN LETTER SMALL CAPITAL N]
645 '\u{1D0E}' | // ᴎ [LATIN LETTER SMALL CAPITAL REVERSED N]
646 '\u{1E44}' | // Ṅ [LATIN CAPITAL LETTER N WITH DOT ABOVE]
647 '\u{1E46}' | // Ṇ [LATIN CAPITAL LETTER N WITH DOT BELOW]
648 '\u{1E48}' | // Ṉ [LATIN CAPITAL LETTER N WITH LINE BELOW]
649 '\u{1E4A}' | // Ṋ [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW]
650 '\u{24C3}' | // Ⓝ [CIRCLED LATIN CAPITAL LETTER N]
651 '\u{FF2E}' // N [FULLWIDTH LATIN CAPITAL LETTER N]
652 => Some("N"),
653 '\u{00F1}' | // ñ [LATIN SMALL LETTER N WITH TILDE]
654 '\u{0144}' | // ń [LATIN SMALL LETTER N WITH ACUTE]
655 '\u{0146}' | // ņ [LATIN SMALL LETTER N WITH CEDILLA]
656 '\u{0148}' | // ň [LATIN SMALL LETTER N WITH CARON]
657 '\u{0149}' | // ʼn [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE]
658 '\u{014B}' | // ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN SMALL LETTER ENG]
659 '\u{019E}' | // ƞ [LATIN SMALL LETTER N WITH LONG RIGHT LEG]
660 '\u{01F9}' | // ǹ [LATIN SMALL LETTER N WITH GRAVE]
661 '\u{0235}' | // ȵ [LATIN SMALL LETTER N WITH CURL]
662 '\u{0272}' | // ɲ [LATIN SMALL LETTER N WITH LEFT HOOK]
663 '\u{0273}' | // ɳ [LATIN SMALL LETTER N WITH RETROFLEX HOOK]
664 '\u{1D70}' | // ᵰ [LATIN SMALL LETTER N WITH MIDDLE TILDE]
665 '\u{1D87}' | // ᶇ [LATIN SMALL LETTER N WITH PALATAL HOOK]
666 '\u{1E45}' | // ṅ [LATIN SMALL LETTER N WITH DOT ABOVE]
667 '\u{1E47}' | // ṇ [LATIN SMALL LETTER N WITH DOT BELOW]
668 '\u{1E49}' | // ṉ [LATIN SMALL LETTER N WITH LINE BELOW]
669 '\u{1E4B}' | // ṋ [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW]
670 '\u{207F}' | // ⁿ [SUPERSCRIPT LATIN SMALL LETTER N]
671 '\u{24DD}' | // ⓝ [CIRCLED LATIN SMALL LETTER N]
672 '\u{FF4E}' // n [FULLWIDTH LATIN SMALL LETTER N]
673 => Some("n"),
674 '\u{01CA}' // NJ [LATIN CAPITAL LETTER NJ]
675 => Some("NJ"),
676 '\u{01CB}' // Nj [LATIN CAPITAL LETTER N WITH SMALL LETTER J]
677 => Some("Nj"),
678 '\u{24A9}' // ⒩ [PARENTHESIZED LATIN SMALL LETTER N]
679 => Some("(n)"),
680 '\u{01CC}' // nj [LATIN SMALL LETTER NJ]
681 => Some("nj"),
682 '\u{00D2}' | // Ò [LATIN CAPITAL LETTER O WITH GRAVE]
683 '\u{00D3}' | // Ó [LATIN CAPITAL LETTER O WITH ACUTE]
684 '\u{00D4}' | // Ô [LATIN CAPITAL LETTER O WITH CIRCUMFLEX]
685 '\u{00D5}' | // Õ [LATIN CAPITAL LETTER O WITH TILDE]
686 '\u{00D6}' | // Ö [LATIN CAPITAL LETTER O WITH DIAERESIS]
687 '\u{00D8}' | // Ø [LATIN CAPITAL LETTER O WITH STROKE]
688 '\u{014C}' | // Ō [LATIN CAPITAL LETTER O WITH MACRON]
689 '\u{014E}' | // Ŏ [LATIN CAPITAL LETTER O WITH BREVE]
690 '\u{0150}' | // Ő [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE]
691 '\u{0186}' | // Ɔ [LATIN CAPITAL LETTER OPEN O]
692 '\u{019F}' | // Ɵ [LATIN CAPITAL LETTER O WITH MIDDLE TILDE]
693 '\u{01A0}' | // Ơ [LATIN CAPITAL LETTER O WITH HORN]
694 '\u{01D1}' | // Ǒ [LATIN CAPITAL LETTER O WITH CARON]
695 '\u{01EA}' | // Ǫ [LATIN CAPITAL LETTER O WITH OGONEK]
696 '\u{01EC}' | // Ǭ [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON]
697 '\u{01FE}' | // Ǿ [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE]
698 '\u{020C}' | // Ȍ [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE]
699 '\u{020E}' | // Ȏ [LATIN CAPITAL LETTER O WITH INVERTED BREVE]
700 '\u{022A}' | // Ȫ [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON]
701 '\u{022C}' | // Ȭ [LATIN CAPITAL LETTER O WITH TILDE AND MACRON]
702 '\u{022E}' | // Ȯ [LATIN CAPITAL LETTER O WITH DOT ABOVE]
703 '\u{0230}' | // Ȱ [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON]
704 '\u{1D0F}' | // ᴏ [LATIN LETTER SMALL CAPITAL O]
705 '\u{1D10}' | // ᴐ [LATIN LETTER SMALL CAPITAL OPEN O]
706 '\u{1E4C}' | // Ṍ [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE]
707 '\u{1E4E}' | // Ṏ [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS]
708 '\u{1E50}' | // Ṑ [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE]
709 '\u{1E52}' | // Ṓ [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE]
710 '\u{1ECC}' | // Ọ [LATIN CAPITAL LETTER O WITH DOT BELOW]
711 '\u{1ECE}' | // Ỏ [LATIN CAPITAL LETTER O WITH HOOK ABOVE]
712 '\u{1ED0}' | // Ố [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE]
713 '\u{1ED2}' | // Ồ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE]
714 '\u{1ED4}' | // Ổ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
715 '\u{1ED6}' | // Ỗ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE]
716 '\u{1ED8}' | // Ộ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
717 '\u{1EDA}' | // Ớ [LATIN CAPITAL LETTER O WITH HORN AND ACUTE]
718 '\u{1EDC}' | // Ờ [LATIN CAPITAL LETTER O WITH HORN AND GRAVE]
719 '\u{1EDE}' | // Ở [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE]
720 '\u{1EE0}' | // Ỡ [LATIN CAPITAL LETTER O WITH HORN AND TILDE]
721 '\u{1EE2}' | // Ợ [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW]
722 '\u{24C4}' | // Ⓞ [CIRCLED LATIN CAPITAL LETTER O]
723 '\u{A74A}' | // Ꝋ [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY]
724 '\u{A74C}' | // Ꝍ [LATIN CAPITAL LETTER O WITH LOOP]
725 '\u{FF2F}' // O [FULLWIDTH LATIN CAPITAL LETTER O]
726 => Some("O"),
727 '\u{00F2}' | // ò [LATIN SMALL LETTER O WITH GRAVE]
728 '\u{00F3}' | // ó [LATIN SMALL LETTER O WITH ACUTE]
729 '\u{00F4}' | // ô [LATIN SMALL LETTER O WITH CIRCUMFLEX]
730 '\u{00F5}' | // õ [LATIN SMALL LETTER O WITH TILDE]
731 '\u{00F6}' | // ö [LATIN SMALL LETTER O WITH DIAERESIS]
732 '\u{00F8}' | // ø [LATIN SMALL LETTER O WITH STROKE]
733 '\u{014D}' | // ō [LATIN SMALL LETTER O WITH MACRON]
734 '\u{014F}' | // ŏ [LATIN SMALL LETTER O WITH BREVE]
735 '\u{0151}' | // ő [LATIN SMALL LETTER O WITH DOUBLE ACUTE]
736 '\u{01A1}' | // ơ [LATIN SMALL LETTER O WITH HORN]
737 '\u{01D2}' | // ǒ [LATIN SMALL LETTER O WITH CARON]
738 '\u{01EB}' | // ǫ [LATIN SMALL LETTER O WITH OGONEK]
739 '\u{01ED}' | // ǭ [LATIN SMALL LETTER O WITH OGONEK AND MACRON]
740 '\u{01FF}' | // ǿ [LATIN SMALL LETTER O WITH STROKE AND ACUTE]
741 '\u{020D}' | // ȍ [LATIN SMALL LETTER O WITH DOUBLE GRAVE]
742 '\u{020F}' | // ȏ [LATIN SMALL LETTER O WITH INVERTED BREVE]
743 '\u{022B}' | // ȫ [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON]
744 '\u{022D}' | // ȭ [LATIN SMALL LETTER O WITH TILDE AND MACRON]
745 '\u{022F}' | // ȯ [LATIN SMALL LETTER O WITH DOT ABOVE]
746 '\u{0231}' | // ȱ [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON]
747 '\u{0254}' | // ɔ [LATIN SMALL LETTER OPEN O]
748 '\u{0275}' | // ɵ [LATIN SMALL LETTER BARRED O]
749 '\u{1D16}' | // ᴖ [LATIN SMALL LETTER TOP HALF O]
750 '\u{1D17}' | // ᴗ [LATIN SMALL LETTER BOTTOM HALF O]
751 '\u{1D97}' | // ᶗ [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK]
752 '\u{1E4D}' | // ṍ [LATIN SMALL LETTER O WITH TILDE AND ACUTE]
753 '\u{1E4F}' | // ṏ [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS]
754 '\u{1E51}' | // ṑ [LATIN SMALL LETTER O WITH MACRON AND GRAVE]
755 '\u{1E53}' | // ṓ [LATIN SMALL LETTER O WITH MACRON AND ACUTE]
756 '\u{1ECD}' | // ọ [LATIN SMALL LETTER O WITH DOT BELOW]
757 '\u{1ECF}' | // ỏ [LATIN SMALL LETTER O WITH HOOK ABOVE]
758 '\u{1ED1}' | // ố [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE]
759 '\u{1ED3}' | // ồ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE]
760 '\u{1ED5}' | // ổ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
761 '\u{1ED7}' | // ỗ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE]
762 '\u{1ED9}' | // ộ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
763 '\u{1EDB}' | // ớ [LATIN SMALL LETTER O WITH HORN AND ACUTE]
764 '\u{1EDD}' | // ờ [LATIN SMALL LETTER O WITH HORN AND GRAVE]
765 '\u{1EDF}' | // ở [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE]
766 '\u{1EE1}' | // ỡ [LATIN SMALL LETTER O WITH HORN AND TILDE]
767 '\u{1EE3}' | // ợ [LATIN SMALL LETTER O WITH HORN AND DOT BELOW]
768 '\u{2092}' | // ₒ [LATIN SUBSCRIPT SMALL LETTER O]
769 '\u{24DE}' | // ⓞ [CIRCLED LATIN SMALL LETTER O]
770 '\u{2C7A}' | // ⱺ [LATIN SMALL LETTER O WITH LOW RING INSIDE]
771 '\u{A74B}' | // ꝋ [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY]
772 '\u{A74D}' | // ꝍ [LATIN SMALL LETTER O WITH LOOP]
773 '\u{FF4F}' // o [FULLWIDTH LATIN SMALL LETTER O]
774 => Some("o"),
775 '\u{0152}' | // Œ [LATIN CAPITAL LIGATURE OE]
776 '\u{0276}' // ɶ [LATIN LETTER SMALL CAPITAL OE]
777 => Some("OE"),
778 '\u{A74E}' // Ꝏ [LATIN CAPITAL LETTER OO]
779 => Some("OO"),
780 '\u{0222}' | // Ȣ http://en.wikipedia.org/wiki/OU [LATIN CAPITAL LETTER OU]
781 '\u{1D15}' // ᴕ [LATIN LETTER SMALL CAPITAL OU]
782 => Some("OU"),
783 '\u{24AA}' // ⒪ [PARENTHESIZED LATIN SMALL LETTER O]
784 => Some("(o)"),
785 '\u{0153}' | // œ [LATIN SMALL LIGATURE OE]
786 '\u{1D14}' // ᴔ [LATIN SMALL LETTER TURNED OE]
787 => Some("oe"),
788 '\u{A74F}' // ꝏ [LATIN SMALL LETTER OO]
789 => Some("oo"),
790 '\u{0223}' // ȣ http://en.wikipedia.org/wiki/OU [LATIN SMALL LETTER OU]
791 => Some("ou"),
792 '\u{01A4}' | // Ƥ [LATIN CAPITAL LETTER P WITH HOOK]
793 '\u{1D18}' | // ᴘ [LATIN LETTER SMALL CAPITAL P]
794 '\u{1E54}' | // Ṕ [LATIN CAPITAL LETTER P WITH ACUTE]
795 '\u{1E56}' | // Ṗ [LATIN CAPITAL LETTER P WITH DOT ABOVE]
796 '\u{24C5}' | // Ⓟ [CIRCLED LATIN CAPITAL LETTER P]
797 '\u{2C63}' | // Ᵽ [LATIN CAPITAL LETTER P WITH STROKE]
798 '\u{A750}' | // Ꝑ [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER]
799 '\u{A752}' | // Ꝓ [LATIN CAPITAL LETTER P WITH FLOURISH]
800 '\u{A754}' | // Ꝕ [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL]
801 '\u{FF30}' // P [FULLWIDTH LATIN CAPITAL LETTER P]
802 => Some("P"),
803 '\u{01A5}' | // ƥ [LATIN SMALL LETTER P WITH HOOK]
804 '\u{1D71}' | // ᵱ [LATIN SMALL LETTER P WITH MIDDLE TILDE]
805 '\u{1D7D}' | // ᵽ [LATIN SMALL LETTER P WITH STROKE]
806 '\u{1D88}' | // ᶈ [LATIN SMALL LETTER P WITH PALATAL HOOK]
807 '\u{1E55}' | // ṕ [LATIN SMALL LETTER P WITH ACUTE]
808 '\u{1E57}' | // ṗ [LATIN SMALL LETTER P WITH DOT ABOVE]
809 '\u{24DF}' | // ⓟ [CIRCLED LATIN SMALL LETTER P]
810 '\u{A751}' | // ꝑ [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER]
811 '\u{A753}' | // ꝓ [LATIN SMALL LETTER P WITH FLOURISH]
812 '\u{A755}' | // ꝕ [LATIN SMALL LETTER P WITH SQUIRREL TAIL]
813 '\u{A7FC}' | // ꟼ [LATIN EPIGRAPHIC LETTER REVERSED P]
814 '\u{FF50}' // p [FULLWIDTH LATIN SMALL LETTER P]
815 => Some("p"),
816 '\u{24AB}' // ⒫ [PARENTHESIZED LATIN SMALL LETTER P]
817 => Some("(p)"),
818 '\u{024A}' | // Ɋ [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL]
819 '\u{24C6}' | // Ⓠ [CIRCLED LATIN CAPITAL LETTER Q]
820 '\u{A756}' | // Ꝗ [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER]
821 '\u{A758}' | // Ꝙ [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE]
822 '\u{FF31}' // Q [FULLWIDTH LATIN CAPITAL LETTER Q]
823 => Some("Q"),
824 '\u{0138}' | // ĸ http://en.wikipedia.org/wiki/Kra_(letter) [LATIN SMALL LETTER KRA]
825 '\u{024B}' | // ɋ [LATIN SMALL LETTER Q WITH HOOK TAIL]
826 '\u{02A0}' | // ʠ [LATIN SMALL LETTER Q WITH HOOK]
827 '\u{24E0}' | // ⓠ [CIRCLED LATIN SMALL LETTER Q]
828 '\u{A757}' | // ꝗ [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER]
829 '\u{A759}' | // ꝙ [LATIN SMALL LETTER Q WITH DIAGONAL STROKE]
830 '\u{FF51}' // q [FULLWIDTH LATIN SMALL LETTER Q]
831 => Some("q"),
832 '\u{24AC}' // ⒬ [PARENTHESIZED LATIN SMALL LETTER Q]
833 => Some("(q)"),
834 '\u{0239}' // ȹ [LATIN SMALL LETTER QP DIGRAPH]
835 => Some("qp"),
836 '\u{0154}' | // Ŕ [LATIN CAPITAL LETTER R WITH ACUTE]
837 '\u{0156}' | // Ŗ [LATIN CAPITAL LETTER R WITH CEDILLA]
838 '\u{0158}' | // Ř [LATIN CAPITAL LETTER R WITH CARON]
839 '\u{0210}' | // Ȓ [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE]
840 '\u{0212}' | // Ȓ [LATIN CAPITAL LETTER R WITH INVERTED BREVE]
841 '\u{024C}' | // Ɍ [LATIN CAPITAL LETTER R WITH STROKE]
842 '\u{0280}' | // ʀ [LATIN LETTER SMALL CAPITAL R]
843 '\u{0281}' | // ʁ [LATIN LETTER SMALL CAPITAL INVERTED R]
844 '\u{1D19}' | // ᴙ [LATIN LETTER SMALL CAPITAL REVERSED R]
845 '\u{1D1A}' | // ᴚ [LATIN LETTER SMALL CAPITAL TURNED R]
846 '\u{1E58}' | // Ṙ [LATIN CAPITAL LETTER R WITH DOT ABOVE]
847 '\u{1E5A}' | // Ṛ [LATIN CAPITAL LETTER R WITH DOT BELOW]
848 '\u{1E5C}' | // Ṝ [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON]
849 '\u{1E5E}' | // Ṟ [LATIN CAPITAL LETTER R WITH LINE BELOW]
850 '\u{24C7}' | // Ⓡ [CIRCLED LATIN CAPITAL LETTER R]
851 '\u{2C64}' | // Ɽ [LATIN CAPITAL LETTER R WITH TAIL]
852 '\u{A75A}' | // Ꝛ [LATIN CAPITAL LETTER R ROTUNDA]
853 '\u{A782}' | // Ꞃ [LATIN CAPITAL LETTER INSULAR R]
854 '\u{FF32}' // R [FULLWIDTH LATIN CAPITAL LETTER R]
855 => Some("R"),
856 '\u{0155}' | // ŕ [LATIN SMALL LETTER R WITH ACUTE]
857 '\u{0157}' | // ŗ [LATIN SMALL LETTER R WITH CEDILLA]
858 '\u{0159}' | // ř [LATIN SMALL LETTER R WITH CARON]
859 '\u{0211}' | // ȑ [LATIN SMALL LETTER R WITH DOUBLE GRAVE]
860 '\u{0213}' | // ȓ [LATIN SMALL LETTER R WITH INVERTED BREVE]
861 '\u{024D}' | // ɍ [LATIN SMALL LETTER R WITH STROKE]
862 '\u{027C}' | // ɼ [LATIN SMALL LETTER R WITH LONG LEG]
863 '\u{027D}' | // ɽ [LATIN SMALL LETTER R WITH TAIL]
864 '\u{027E}' | // ɾ [LATIN SMALL LETTER R WITH FISHHOOK]
865 '\u{027F}' | // ɿ [LATIN SMALL LETTER REVERSED R WITH FISHHOOK]
866 '\u{1D63}' | // ᵣ [LATIN SUBSCRIPT SMALL LETTER R]
867 '\u{1D72}' | // ᵲ [LATIN SMALL LETTER R WITH MIDDLE TILDE]
868 '\u{1D73}' | // ᵳ [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE]
869 '\u{1D89}' | // ᶉ [LATIN SMALL LETTER R WITH PALATAL HOOK]
870 '\u{1E59}' | // ṙ [LATIN SMALL LETTER R WITH DOT ABOVE]
871 '\u{1E5B}' | // ṛ [LATIN SMALL LETTER R WITH DOT BELOW]
872 '\u{1E5D}' | // ṝ [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON]
873 '\u{1E5F}' | // ṟ [LATIN SMALL LETTER R WITH LINE BELOW]
874 '\u{24E1}' | // ⓡ [CIRCLED LATIN SMALL LETTER R]
875 '\u{A75B}' | // ꝛ [LATIN SMALL LETTER R ROTUNDA]
876 '\u{A783}' | // ꞃ [LATIN SMALL LETTER INSULAR R]
877 '\u{FF52}' // r [FULLWIDTH LATIN SMALL LETTER R]
878 => Some("r"),
879 '\u{24AD}' // ⒭ [PARENTHESIZED LATIN SMALL LETTER R]
880 => Some("(r)"),
881 '\u{015A}' | // Ś [LATIN CAPITAL LETTER S WITH ACUTE]
882 '\u{015C}' | // Ŝ [LATIN CAPITAL LETTER S WITH CIRCUMFLEX]
883 '\u{015E}' | // Ş [LATIN CAPITAL LETTER S WITH CEDILLA]
884 '\u{0160}' | // Š [LATIN CAPITAL LETTER S WITH CARON]
885 '\u{0218}' | // Ș [LATIN CAPITAL LETTER S WITH COMMA BELOW]
886 '\u{1E60}' | // Ṡ [LATIN CAPITAL LETTER S WITH DOT ABOVE]
887 '\u{1E62}' | // Ṣ [LATIN CAPITAL LETTER S WITH DOT BELOW]
888 '\u{1E64}' | // Ṥ [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE]
889 '\u{1E66}' | // Ṧ [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE]
890 '\u{1E68}' | // Ṩ [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE]
891 '\u{24C8}' | // Ⓢ [CIRCLED LATIN CAPITAL LETTER S]
892 '\u{A731}' | // ꜱ [LATIN LETTER SMALL CAPITAL S]
893 '\u{A785}' | // ꞅ [LATIN SMALL LETTER INSULAR S]
894 '\u{FF33}' // S [FULLWIDTH LATIN CAPITAL LETTER S]
895 => Some("S"),
896 '\u{015B}' | // ś [LATIN SMALL LETTER S WITH ACUTE]
897 '\u{015D}' | // ŝ [LATIN SMALL LETTER S WITH CIRCUMFLEX]
898 '\u{015F}' | // ş [LATIN SMALL LETTER S WITH CEDILLA]
899 '\u{0161}' | // š [LATIN SMALL LETTER S WITH CARON]
900 '\u{017F}' | // ſ http://en.wikipedia.org/wiki/Long_S [LATIN SMALL LETTER LONG S]
901 '\u{0219}' | // ș [LATIN SMALL LETTER S WITH COMMA BELOW]
902 '\u{023F}' | // ȿ [LATIN SMALL LETTER S WITH SWASH TAIL]
903 '\u{0282}' | // ʂ [LATIN SMALL LETTER S WITH HOOK]
904 '\u{1D74}' | // ᵴ [LATIN SMALL LETTER S WITH MIDDLE TILDE]
905 '\u{1D8A}' | // ᶊ [LATIN SMALL LETTER S WITH PALATAL HOOK]
906 '\u{1E61}' | // ṡ [LATIN SMALL LETTER S WITH DOT ABOVE]
907 '\u{1E63}' | // ṣ [LATIN SMALL LETTER S WITH DOT BELOW]
908 '\u{1E65}' | // ṥ [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE]
909 '\u{1E67}' | // ṧ [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE]
910 '\u{1E69}' | // ṩ [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE]
911 '\u{1E9C}' | // ẜ [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE]
912 '\u{1E9D}' | // ẝ [LATIN SMALL LETTER LONG S WITH HIGH STROKE]
913 '\u{24E2}' | // ⓢ [CIRCLED LATIN SMALL LETTER S]
914 '\u{A784}' | // Ꞅ [LATIN CAPITAL LETTER INSULAR S]
915 '\u{FF53}' // s [FULLWIDTH LATIN SMALL LETTER S]
916 => Some("s"),
917 '\u{1E9E}' // ẞ [LATIN CAPITAL LETTER SHARP S]
918 => Some("SS"),
919 '\u{24AE}' // ⒮ [PARENTHESIZED LATIN SMALL LETTER S]
920 => Some("(s)"),
921 '\u{00DF}' // ß [LATIN SMALL LETTER SHARP S]
922 => Some("ss"),
923 '\u{FB06}' // st [LATIN SMALL LIGATURE ST]
924 => Some("st"),
925 '\u{0162}' | // Ţ [LATIN CAPITAL LETTER T WITH CEDILLA]
926 '\u{0164}' | // Ť [LATIN CAPITAL LETTER T WITH CARON]
927 '\u{0166}' | // Ŧ [LATIN CAPITAL LETTER T WITH STROKE]
928 '\u{01AC}' | // Ƭ [LATIN CAPITAL LETTER T WITH HOOK]
929 '\u{01AE}' | // Ʈ [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK]
930 '\u{021A}' | // Ț [LATIN CAPITAL LETTER T WITH COMMA BELOW]
931 '\u{023E}' | // Ⱦ [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE]
932 '\u{1D1B}' | // ᴛ [LATIN LETTER SMALL CAPITAL T]
933 '\u{1E6A}' | // Ṫ [LATIN CAPITAL LETTER T WITH DOT ABOVE]
934 '\u{1E6C}' | // Ṭ [LATIN CAPITAL LETTER T WITH DOT BELOW]
935 '\u{1E6E}' | // Ṯ [LATIN CAPITAL LETTER T WITH LINE BELOW]
936 '\u{1E70}' | // Ṱ [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW]
937 '\u{24C9}' | // Ⓣ [CIRCLED LATIN CAPITAL LETTER T]
938 '\u{A786}' | // Ꞇ [LATIN CAPITAL LETTER INSULAR T]
939 '\u{FF34}' // T [FULLWIDTH LATIN CAPITAL LETTER T]
940 => Some("T"),
941 '\u{0163}' | // ţ [LATIN SMALL LETTER T WITH CEDILLA]
942 '\u{0165}' | // ť [LATIN SMALL LETTER T WITH CARON]
943 '\u{0167}' | // ŧ [LATIN SMALL LETTER T WITH STROKE]
944 '\u{01AB}' | // ƫ [LATIN SMALL LETTER T WITH PALATAL HOOK]
945 '\u{01AD}' | // ƭ [LATIN SMALL LETTER T WITH HOOK]
946 '\u{021B}' | // ț [LATIN SMALL LETTER T WITH COMMA BELOW]
947 '\u{0236}' | // ȶ [LATIN SMALL LETTER T WITH CURL]
948 '\u{0287}' | // ʇ [LATIN SMALL LETTER TURNED T]
949 '\u{0288}' | // ʈ [LATIN SMALL LETTER T WITH RETROFLEX HOOK]
950 '\u{1D75}' | // ᵵ [LATIN SMALL LETTER T WITH MIDDLE TILDE]
951 '\u{1E6B}' | // ṫ [LATIN SMALL LETTER T WITH DOT ABOVE]
952 '\u{1E6D}' | // ṭ [LATIN SMALL LETTER T WITH DOT BELOW]
953 '\u{1E6F}' | // ṯ [LATIN SMALL LETTER T WITH LINE BELOW]
954 '\u{1E71}' | // ṱ [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW]
955 '\u{1E97}' | // ẗ [LATIN SMALL LETTER T WITH DIAERESIS]
956 '\u{24E3}' | // ⓣ [CIRCLED LATIN SMALL LETTER T]
957 '\u{2C66}' | // ⱦ [LATIN SMALL LETTER T WITH DIAGONAL STROKE]
958 '\u{FF54}' // t [FULLWIDTH LATIN SMALL LETTER T]
959 => Some("t"),
960 '\u{00DE}' | // Þ [LATIN CAPITAL LETTER THORN]
961 '\u{A766}' // Ꝧ [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER]
962 => Some("TH"),
963 '\u{A728}' // Ꜩ [LATIN CAPITAL LETTER TZ]
964 => Some("TZ"),
965 '\u{24AF}' // ⒯ [PARENTHESIZED LATIN SMALL LETTER T]
966 => Some("(t)"),
967 '\u{02A8}' // ʨ [LATIN SMALL LETTER TC DIGRAPH WITH CURL]
968 => Some("tc"),
969 '\u{00FE}' | // þ [LATIN SMALL LETTER THORN]
970 '\u{1D7A}' | // ᵺ [LATIN SMALL LETTER TH WITH STRIKETHROUGH]
971 '\u{A767}' // ꝧ [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER]
972 => Some("th"),
973 '\u{02A6}' // ʦ [LATIN SMALL LETTER TS DIGRAPH]
974 => Some("ts"),
975 '\u{A729}' // ꜩ [LATIN SMALL LETTER TZ]
976 => Some("tz"),
977 '\u{00D9}' | // Ù [LATIN CAPITAL LETTER U WITH GRAVE]
978 '\u{00DA}' | // Ú [LATIN CAPITAL LETTER U WITH ACUTE]
979 '\u{00DB}' | // Û [LATIN CAPITAL LETTER U WITH CIRCUMFLEX]
980 '\u{00DC}' | // Ü [LATIN CAPITAL LETTER U WITH DIAERESIS]
981 '\u{0168}' | // Ũ [LATIN CAPITAL LETTER U WITH TILDE]
982 '\u{016A}' | // Ū [LATIN CAPITAL LETTER U WITH MACRON]
983 '\u{016C}' | // Ŭ [LATIN CAPITAL LETTER U WITH BREVE]
984 '\u{016E}' | // Ů [LATIN CAPITAL LETTER U WITH RING ABOVE]
985 '\u{0170}' | // Ű [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE]
986 '\u{0172}' | // Ų [LATIN CAPITAL LETTER U WITH OGONEK]
987 '\u{01AF}' | // Ư [LATIN CAPITAL LETTER U WITH HORN]
988 '\u{01D3}' | // Ǔ [LATIN CAPITAL LETTER U WITH CARON]
989 '\u{01D5}' | // Ǖ [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON]
990 '\u{01D7}' | // Ǘ [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE]
991 '\u{01D9}' | // Ǚ [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON]
992 '\u{01DB}' | // Ǜ [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE]
993 '\u{0214}' | // Ȕ [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE]
994 '\u{0216}' | // Ȗ [LATIN CAPITAL LETTER U WITH INVERTED BREVE]
995 '\u{0244}' | // Ʉ [LATIN CAPITAL LETTER U BAR]
996 '\u{1D1C}' | // ᴜ [LATIN LETTER SMALL CAPITAL U]
997 '\u{1D7E}' | // ᵾ [LATIN SMALL CAPITAL LETTER U WITH STROKE]
998 '\u{1E72}' | // Ṳ [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW]
999 '\u{1E74}' | // Ṵ [LATIN CAPITAL LETTER U WITH TILDE BELOW]
1000 '\u{1E76}' | // Ṷ [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW]
1001 '\u{1E78}' | // Ṹ [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE]
1002 '\u{1E7A}' | // Ṻ [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS]
1003 '\u{1EE4}' | // Ụ [LATIN CAPITAL LETTER U WITH DOT BELOW]
1004 '\u{1EE6}' | // Ủ [LATIN CAPITAL LETTER U WITH HOOK ABOVE]
1005 '\u{1EE8}' | // Ứ [LATIN CAPITAL LETTER U WITH HORN AND ACUTE]
1006 '\u{1EEA}' | // Ừ [LATIN CAPITAL LETTER U WITH HORN AND GRAVE]
1007 '\u{1EEC}' | // Ử [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE]
1008 '\u{1EEE}' | // Ữ [LATIN CAPITAL LETTER U WITH HORN AND TILDE]
1009 '\u{1EF0}' | // Ự [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW]
1010 '\u{24CA}' | // Ⓤ [CIRCLED LATIN CAPITAL LETTER U]
1011 '\u{FF35}' // U [FULLWIDTH LATIN CAPITAL LETTER U]
1012 => Some("U"),
1013 '\u{00F9}' | // ù [LATIN SMALL LETTER U WITH GRAVE]
1014 '\u{00FA}' | // ú [LATIN SMALL LETTER U WITH ACUTE]
1015 '\u{00FB}' | // û [LATIN SMALL LETTER U WITH CIRCUMFLEX]
1016 '\u{00FC}' | // ü [LATIN SMALL LETTER U WITH DIAERESIS]
1017 '\u{0169}' | // ũ [LATIN SMALL LETTER U WITH TILDE]
1018 '\u{016B}' | // ū [LATIN SMALL LETTER U WITH MACRON]
1019 '\u{016D}' | // ŭ [LATIN SMALL LETTER U WITH BREVE]
1020 '\u{016F}' | // ů [LATIN SMALL LETTER U WITH RING ABOVE]
1021 '\u{0171}' | // ű [LATIN SMALL LETTER U WITH DOUBLE ACUTE]
1022 '\u{0173}' | // ų [LATIN SMALL LETTER U WITH OGONEK]
1023 '\u{01B0}' | // ư [LATIN SMALL LETTER U WITH HORN]
1024 '\u{01D4}' | // ǔ [LATIN SMALL LETTER U WITH CARON]
1025 '\u{01D6}' | // ǖ [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON]
1026 '\u{01D8}' | // ǘ [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE]
1027 '\u{01DA}' | // ǚ [LATIN SMALL LETTER U WITH DIAERESIS AND CARON]
1028 '\u{01DC}' | // ǜ [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE]
1029 '\u{0215}' | // ȕ [LATIN SMALL LETTER U WITH DOUBLE GRAVE]
1030 '\u{0217}' | // ȗ [LATIN SMALL LETTER U WITH INVERTED BREVE]
1031 '\u{0289}' | // ʉ [LATIN SMALL LETTER U BAR]
1032 '\u{1D64}' | // ᵤ [LATIN SUBSCRIPT SMALL LETTER U]
1033 '\u{1D99}' | // ᶙ [LATIN SMALL LETTER U WITH RETROFLEX HOOK]
1034 '\u{1E73}' | // ṳ [LATIN SMALL LETTER U WITH DIAERESIS BELOW]
1035 '\u{1E75}' | // ṵ [LATIN SMALL LETTER U WITH TILDE BELOW]
1036 '\u{1E77}' | // ṷ [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW]
1037 '\u{1E79}' | // ṹ [LATIN SMALL LETTER U WITH TILDE AND ACUTE]
1038 '\u{1E7B}' | // ṻ [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS]
1039 '\u{1EE5}' | // ụ [LATIN SMALL LETTER U WITH DOT BELOW]
1040 '\u{1EE7}' | // ủ [LATIN SMALL LETTER U WITH HOOK ABOVE]
1041 '\u{1EE9}' | // ứ [LATIN SMALL LETTER U WITH HORN AND ACUTE]
1042 '\u{1EEB}' | // ừ [LATIN SMALL LETTER U WITH HORN AND GRAVE]
1043 '\u{1EED}' | // ử [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE]
1044 '\u{1EEF}' | // ữ [LATIN SMALL LETTER U WITH HORN AND TILDE]
1045 '\u{1EF1}' | // ự [LATIN SMALL LETTER U WITH HORN AND DOT BELOW]
1046 '\u{24E4}' | // ⓤ [CIRCLED LATIN SMALL LETTER U]
1047 '\u{FF55}' // u [FULLWIDTH LATIN SMALL LETTER U]
1048 => Some("u"),
1049 '\u{24B0}' // ⒰ [PARENTHESIZED LATIN SMALL LETTER U]
1050 => Some("(u)"),
1051 '\u{1D6B}' // ᵫ [LATIN SMALL LETTER UE]
1052 => Some("ue"),
1053 '\u{01B2}' | // Ʋ [LATIN CAPITAL LETTER V WITH HOOK]
1054 '\u{0245}' | // Ʌ [LATIN CAPITAL LETTER TURNED V]
1055 '\u{1D20}' | // ᴠ [LATIN LETTER SMALL CAPITAL V]
1056 '\u{1E7C}' | // Ṽ [LATIN CAPITAL LETTER V WITH TILDE]
1057 '\u{1E7E}' | // Ṿ [LATIN CAPITAL LETTER V WITH DOT BELOW]
1058 '\u{1EFC}' | // Ỽ [LATIN CAPITAL LETTER MIDDLE-WELSH V]
1059 '\u{24CB}' | // Ⓥ [CIRCLED LATIN CAPITAL LETTER V]
1060 '\u{A75E}' | // Ꝟ [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE]
1061 '\u{A768}' | // Ꝩ [LATIN CAPITAL LETTER VEND]
1062 '\u{FF36}' // V [FULLWIDTH LATIN CAPITAL LETTER V]
1063 => Some("V"),
1064 '\u{028B}' | // ʋ [LATIN SMALL LETTER V WITH HOOK]
1065 '\u{028C}' | // ʌ [LATIN SMALL LETTER TURNED V]
1066 '\u{1D65}' | // ᵥ [LATIN SUBSCRIPT SMALL LETTER V]
1067 '\u{1D8C}' | // ᶌ [LATIN SMALL LETTER V WITH PALATAL HOOK]
1068 '\u{1E7D}' | // ṽ [LATIN SMALL LETTER V WITH TILDE]
1069 '\u{1E7F}' | // ṿ [LATIN SMALL LETTER V WITH DOT BELOW]
1070 '\u{24E5}' | // ⓥ [CIRCLED LATIN SMALL LETTER V]
1071 '\u{2C71}' | // ⱱ [LATIN SMALL LETTER V WITH RIGHT HOOK]
1072 '\u{2C74}' | // ⱴ [LATIN SMALL LETTER V WITH CURL]
1073 '\u{A75F}' | // ꝟ [LATIN SMALL LETTER V WITH DIAGONAL STROKE]
1074 '\u{FF56}' // v [FULLWIDTH LATIN SMALL LETTER V]
1075 => Some("v"),
1076 '\u{A760}' // Ꝡ [LATIN CAPITAL LETTER VY]
1077 => Some("VY"),
1078 '\u{24B1}' // ⒱ [PARENTHESIZED LATIN SMALL LETTER V]
1079 => Some("(v)"),
1080 '\u{A761}' // ꝡ [LATIN SMALL LETTER VY]
1081 => Some("vy"),
1082 '\u{0174}' | // Ŵ [LATIN CAPITAL LETTER W WITH CIRCUMFLEX]
1083 '\u{01F7}' | // Ƿ http://en.wikipedia.org/wiki/Wynn [LATIN CAPITAL LETTER WYNN]
1084 '\u{1D21}' | // ᴡ [LATIN LETTER SMALL CAPITAL W]
1085 '\u{1E80}' | // Ẁ [LATIN CAPITAL LETTER W WITH GRAVE]
1086 '\u{1E82}' | // Ẃ [LATIN CAPITAL LETTER W WITH ACUTE]
1087 '\u{1E84}' | // Ẅ [LATIN CAPITAL LETTER W WITH DIAERESIS]
1088 '\u{1E86}' | // Ẇ [LATIN CAPITAL LETTER W WITH DOT ABOVE]
1089 '\u{1E88}' | // Ẉ [LATIN CAPITAL LETTER W WITH DOT BELOW]
1090 '\u{24CC}' | // Ⓦ [CIRCLED LATIN CAPITAL LETTER W]
1091 '\u{2C72}' | // Ⱳ [LATIN CAPITAL LETTER W WITH HOOK]
1092 '\u{FF37}' // W [FULLWIDTH LATIN CAPITAL LETTER W]
1093 => Some("W"),
1094 '\u{0175}' | // ŵ [LATIN SMALL LETTER W WITH CIRCUMFLEX]
1095 '\u{01BF}' | // ƿ http://en.wikipedia.org/wiki/Wynn [LATIN LETTER WYNN]
1096 '\u{028D}' | // ʍ [LATIN SMALL LETTER TURNED W]
1097 '\u{1E81}' | // ẁ [LATIN SMALL LETTER W WITH GRAVE]
1098 '\u{1E83}' | // ẃ [LATIN SMALL LETTER W WITH ACUTE]
1099 '\u{1E85}' | // ẅ [LATIN SMALL LETTER W WITH DIAERESIS]
1100 '\u{1E87}' | // ẇ [LATIN SMALL LETTER W WITH DOT ABOVE]
1101 '\u{1E89}' | // ẉ [LATIN SMALL LETTER W WITH DOT BELOW]
1102 '\u{1E98}' | // ẘ [LATIN SMALL LETTER W WITH RING ABOVE]
1103 '\u{24E6}' | // ⓦ [CIRCLED LATIN SMALL LETTER W]
1104 '\u{2C73}' | // ⱳ [LATIN SMALL LETTER W WITH HOOK]
1105 '\u{FF57}' // w [FULLWIDTH LATIN SMALL LETTER W]
1106 => Some("w"),
1107 '\u{24B2}' // ⒲ [PARENTHESIZED LATIN SMALL LETTER W]
1108 => Some("(w)"),
1109 '\u{1E8A}' | // Ẋ [LATIN CAPITAL LETTER X WITH DOT ABOVE]
1110 '\u{1E8C}' | // Ẍ [LATIN CAPITAL LETTER X WITH DIAERESIS]
1111 '\u{24CD}' | // Ⓧ [CIRCLED LATIN CAPITAL LETTER X]
1112 '\u{FF38}' // X [FULLWIDTH LATIN CAPITAL LETTER X]
1113 => Some("X"),
1114 '\u{1D8D}' | // ᶍ [LATIN SMALL LETTER X WITH PALATAL HOOK]
1115 '\u{1E8B}' | // ẋ [LATIN SMALL LETTER X WITH DOT ABOVE]
1116 '\u{1E8D}' | // ẍ [LATIN SMALL LETTER X WITH DIAERESIS]
1117 '\u{2093}' | // ₓ [LATIN SUBSCRIPT SMALL LETTER X]
1118 '\u{24E7}' | // ⓧ [CIRCLED LATIN SMALL LETTER X]
1119 '\u{FF58}' // x [FULLWIDTH LATIN SMALL LETTER X]
1120 => Some("x"),
1121 '\u{24B3}' // ⒳ [PARENTHESIZED LATIN SMALL LETTER X]
1122 => Some("(x)"),
1123 '\u{00DD}' | // Ý [LATIN CAPITAL LETTER Y WITH ACUTE]
1124 '\u{0176}' | // Ŷ [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX]
1125 '\u{0178}' | // Ÿ [LATIN CAPITAL LETTER Y WITH DIAERESIS]
1126 '\u{01B3}' | // Ƴ [LATIN CAPITAL LETTER Y WITH HOOK]
1127 '\u{0232}' | // Ȳ [LATIN CAPITAL LETTER Y WITH MACRON]
1128 '\u{024E}' | // Ɏ [LATIN CAPITAL LETTER Y WITH STROKE]
1129 '\u{028F}' | // ʏ [LATIN LETTER SMALL CAPITAL Y]
1130 '\u{1E8E}' | // Ẏ [LATIN CAPITAL LETTER Y WITH DOT ABOVE]
1131 '\u{1EF2}' | // Ỳ [LATIN CAPITAL LETTER Y WITH GRAVE]
1132 '\u{1EF4}' | // Ỵ [LATIN CAPITAL LETTER Y WITH DOT BELOW]
1133 '\u{1EF6}' | // Ỷ [LATIN CAPITAL LETTER Y WITH HOOK ABOVE]
1134 '\u{1EF8}' | // Ỹ [LATIN CAPITAL LETTER Y WITH TILDE]
1135 '\u{1EFE}' | // Ỿ [LATIN CAPITAL LETTER Y WITH LOOP]
1136 '\u{24CE}' | // Ⓨ [CIRCLED LATIN CAPITAL LETTER Y]
1137 '\u{FF39}' // Y [FULLWIDTH LATIN CAPITAL LETTER Y]
1138 => Some("Y"),
1139 '\u{00FD}' | // ý [LATIN SMALL LETTER Y WITH ACUTE]
1140 '\u{00FF}' | // ÿ [LATIN SMALL LETTER Y WITH DIAERESIS]
1141 '\u{0177}' | // ŷ [LATIN SMALL LETTER Y WITH CIRCUMFLEX]
1142 '\u{01B4}' | // ƴ [LATIN SMALL LETTER Y WITH HOOK]
1143 '\u{0233}' | // ȳ [LATIN SMALL LETTER Y WITH MACRON]
1144 '\u{024F}' | // ɏ [LATIN SMALL LETTER Y WITH STROKE]
1145 '\u{028E}' | // ʎ [LATIN SMALL LETTER TURNED Y]
1146 '\u{1E8F}' | // ẏ [LATIN SMALL LETTER Y WITH DOT ABOVE]
1147 '\u{1E99}' | // ẙ [LATIN SMALL LETTER Y WITH RING ABOVE]
1148 '\u{1EF3}' | // ỳ [LATIN SMALL LETTER Y WITH GRAVE]
1149 '\u{1EF5}' | // ỵ [LATIN SMALL LETTER Y WITH DOT BELOW]
1150 '\u{1EF7}' | // ỷ [LATIN SMALL LETTER Y WITH HOOK ABOVE]
1151 '\u{1EF9}' | // ỹ [LATIN SMALL LETTER Y WITH TILDE]
1152 '\u{1EFF}' | // ỿ [LATIN SMALL LETTER Y WITH LOOP]
1153 '\u{24E8}' | // ⓨ [CIRCLED LATIN SMALL LETTER Y]
1154 '\u{FF59}' // y [FULLWIDTH LATIN SMALL LETTER Y]
1155 => Some("y"),
1156 '\u{24B4}' // ⒴ [PARENTHESIZED LATIN SMALL LETTER Y]
1157 => Some("(y)"),
1158 '\u{0179}' | // Ź [LATIN CAPITAL LETTER Z WITH ACUTE]
1159 '\u{017B}' | // Ż [LATIN CAPITAL LETTER Z WITH DOT ABOVE]
1160 '\u{017D}' | // Ž [LATIN CAPITAL LETTER Z WITH CARON]
1161 '\u{01B5}' | // Ƶ [LATIN CAPITAL LETTER Z WITH STROKE]
1162 '\u{021C}' | // Ȝ http://en.wikipedia.org/wiki/Yogh [LATIN CAPITAL LETTER YOGH]
1163 '\u{0224}' | // Ȥ [LATIN CAPITAL LETTER Z WITH HOOK]
1164 '\u{1D22}' | // ᴢ [LATIN LETTER SMALL CAPITAL Z]
1165 '\u{1E90}' | // Ẑ [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX]
1166 '\u{1E92}' | // Ẓ [LATIN CAPITAL LETTER Z WITH DOT BELOW]
1167 '\u{1E94}' | // Ẕ [LATIN CAPITAL LETTER Z WITH LINE BELOW]
1168 '\u{24CF}' | // Ⓩ [CIRCLED LATIN CAPITAL LETTER Z]
1169 '\u{2C6B}' | // Ⱬ [LATIN CAPITAL LETTER Z WITH DESCENDER]
1170 '\u{A762}' | // Ꝣ [LATIN CAPITAL LETTER VISIGOTHIC Z]
1171 '\u{FF3A}' // Z [FULLWIDTH LATIN CAPITAL LETTER Z]
1172 => Some("Z"),
1173 '\u{017A}' | // ź [LATIN SMALL LETTER Z WITH ACUTE]
1174 '\u{017C}' | // ż [LATIN SMALL LETTER Z WITH DOT ABOVE]
1175 '\u{017E}' | // ž [LATIN SMALL LETTER Z WITH CARON]
1176 '\u{01B6}' | // ƶ [LATIN SMALL LETTER Z WITH STROKE]
1177 '\u{021D}' | // ȝ http://en.wikipedia.org/wiki/Yogh [LATIN SMALL LETTER YOGH]
1178 '\u{0225}' | // ȥ [LATIN SMALL LETTER Z WITH HOOK]
1179 '\u{0240}' | // ɀ [LATIN SMALL LETTER Z WITH SWASH TAIL]
1180 '\u{0290}' | // ʐ [LATIN SMALL LETTER Z WITH RETROFLEX HOOK]
1181 '\u{0291}' | // ʑ [LATIN SMALL LETTER Z WITH CURL]
1182 '\u{1D76}' | // ᵶ [LATIN SMALL LETTER Z WITH MIDDLE TILDE]
1183 '\u{1D8E}' | // ᶎ [LATIN SMALL LETTER Z WITH PALATAL HOOK]
1184 '\u{1E91}' | // ẑ [LATIN SMALL LETTER Z WITH CIRCUMFLEX]
1185 '\u{1E93}' | // ẓ [LATIN SMALL LETTER Z WITH DOT BELOW]
1186 '\u{1E95}' | // ẕ [LATIN SMALL LETTER Z WITH LINE BELOW]
1187 '\u{24E9}' | // ⓩ [CIRCLED LATIN SMALL LETTER Z]
1188 '\u{2C6C}' | // ⱬ [LATIN SMALL LETTER Z WITH DESCENDER]
1189 '\u{A763}' | // ꝣ [LATIN SMALL LETTER VISIGOTHIC Z]
1190 '\u{FF5A}' // z [FULLWIDTH LATIN SMALL LETTER Z]
1191 => Some("z"),
1192 '\u{24B5}' // ⒵ [PARENTHESIZED LATIN SMALL LETTER Z]
1193 => Some("(z)"),
1194 '\u{2070}' | // ⁰ [SUPERSCRIPT ZERO]
1195 '\u{2080}' | // ₀ [SUBSCRIPT ZERO]
1196 '\u{24EA}' | // ⓪ [CIRCLED DIGIT ZERO]
1197 '\u{24FF}' | // ⓿ [NEGATIVE CIRCLED DIGIT ZERO]
1198 '\u{FF10}' // 0 [FULLWIDTH DIGIT ZERO]
1199 => Some("0"),
1200 '\u{00B9}' | // ¹ [SUPERSCRIPT ONE]
1201 '\u{2081}' | // ₁ [SUBSCRIPT ONE]
1202 '\u{2460}' | // ① [CIRCLED DIGIT ONE]
1203 '\u{24F5}' | // ⓵ [DOUBLE CIRCLED DIGIT ONE]
1204 '\u{2776}' | // ❶ [DINGBAT NEGATIVE CIRCLED DIGIT ONE]
1205 '\u{2780}' | // ➀ [DINGBAT CIRCLED SANS-SERIF DIGIT ONE]
1206 '\u{278A}' | // ➊ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE]
1207 '\u{FF11}' // 1 [FULLWIDTH DIGIT ONE]
1208 => Some("1"),
1209 '\u{2488}' // ⒈ [DIGIT ONE FULL STOP]
1210 => Some("1."),
1211 '\u{2474}' // ⑴ [PARENTHESIZED DIGIT ONE]
1212 => Some("(1)"),
1213 '\u{00B2}' | // ² [SUPERSCRIPT TWO]
1214 '\u{2082}' | // ₂ [SUBSCRIPT TWO]
1215 '\u{2461}' | // ② [CIRCLED DIGIT TWO]
1216 '\u{24F6}' | // ⓶ [DOUBLE CIRCLED DIGIT TWO]
1217 '\u{2777}' | // ❷ [DINGBAT NEGATIVE CIRCLED DIGIT TWO]
1218 '\u{2781}' | // ➁ [DINGBAT CIRCLED SANS-SERIF DIGIT TWO]
1219 '\u{278B}' | // ➋ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO]
1220 '\u{FF12}' // 2 [FULLWIDTH DIGIT TWO]
1221 => Some("2"),
1222 '\u{2489}' // ⒉ [DIGIT TWO FULL STOP]
1223 => Some("2."),
1224 '\u{2475}' // ⑵ [PARENTHESIZED DIGIT TWO]
1225 => Some("(2)"),
1226 '\u{00B3}' | // ³ [SUPERSCRIPT THREE]
1227 '\u{2083}' | // ₃ [SUBSCRIPT THREE]
1228 '\u{2462}' | // ③ [CIRCLED DIGIT THREE]
1229 '\u{24F7}' | // ⓷ [DOUBLE CIRCLED DIGIT THREE]
1230 '\u{2778}' | // ❸ [DINGBAT NEGATIVE CIRCLED DIGIT THREE]
1231 '\u{2782}' | // ➂ [DINGBAT CIRCLED SANS-SERIF DIGIT THREE]
1232 '\u{278C}' | // ➌ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE]
1233 '\u{FF13}' // 3 [FULLWIDTH DIGIT THREE]
1234 => Some("3"),
1235 '\u{248A}' // ⒊ [DIGIT THREE FULL STOP]
1236 => Some("3."),
1237 '\u{2476}' // ⑶ [PARENTHESIZED DIGIT THREE]
1238 => Some("(3)"),
1239 '\u{2074}' | // ⁴ [SUPERSCRIPT FOUR]
1240 '\u{2084}' | // ₄ [SUBSCRIPT FOUR]
1241 '\u{2463}' | // ④ [CIRCLED DIGIT FOUR]
1242 '\u{24F8}' | // ⓸ [DOUBLE CIRCLED DIGIT FOUR]
1243 '\u{2779}' | // ❹ [DINGBAT NEGATIVE CIRCLED DIGIT FOUR]
1244 '\u{2783}' | // ➃ [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR]
1245 '\u{278D}' | // ➍ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR]
1246 '\u{FF14}' // 4 [FULLWIDTH DIGIT FOUR]
1247 => Some("4"),
1248 '\u{248B}' // ⒋ [DIGIT FOUR FULL STOP]
1249 => Some("4."),
1250 '\u{2477}' // ⑷ [PARENTHESIZED DIGIT FOUR]
1251 => Some("(4)"),
1252 '\u{2075}' | // ⁵ [SUPERSCRIPT FIVE]
1253 '\u{2085}' | // ₅ [SUBSCRIPT FIVE]
1254 '\u{2464}' | // ⑤ [CIRCLED DIGIT FIVE]
1255 '\u{24F9}' | // ⓹ [DOUBLE CIRCLED DIGIT FIVE]
1256 '\u{277A}' | // ❺ [DINGBAT NEGATIVE CIRCLED DIGIT FIVE]
1257 '\u{2784}' | // ➄ [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE]
1258 '\u{278E}' | // ➎ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE]
1259 '\u{FF15}' // 5 [FULLWIDTH DIGIT FIVE]
1260 => Some("5"),
1261 '\u{248C}' // ⒌ [DIGIT FIVE FULL STOP]
1262 => Some("5."),
1263 '\u{2478}' // ⑸ [PARENTHESIZED DIGIT FIVE]
1264 => Some("(5)"),
1265 '\u{2076}' | // ⁶ [SUPERSCRIPT SIX]
1266 '\u{2086}' | // ₆ [SUBSCRIPT SIX]
1267 '\u{2465}' | // ⑥ [CIRCLED DIGIT SIX]
1268 '\u{24FA}' | // ⓺ [DOUBLE CIRCLED DIGIT SIX]
1269 '\u{277B}' | // ❻ [DINGBAT NEGATIVE CIRCLED DIGIT SIX]
1270 '\u{2785}' | // ➅ [DINGBAT CIRCLED SANS-SERIF DIGIT SIX]
1271 '\u{278F}' | // ➏ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX]
1272 '\u{FF16}' // 6 [FULLWIDTH DIGIT SIX]
1273 => Some("6"),
1274 '\u{248D}' // ⒍ [DIGIT SIX FULL STOP]
1275 => Some("6."),
1276 '\u{2479}' // ⑹ [PARENTHESIZED DIGIT SIX]
1277 => Some("(6)"),
1278 '\u{2077}' | // ⁷ [SUPERSCRIPT SEVEN]
1279 '\u{2087}' | // ₇ [SUBSCRIPT SEVEN]
1280 '\u{2466}' | // ⑦ [CIRCLED DIGIT SEVEN]
1281 '\u{24FB}' | // ⓻ [DOUBLE CIRCLED DIGIT SEVEN]
1282 '\u{277C}' | // ❼ [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN]
1283 '\u{2786}' | // ➆ [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN]
1284 '\u{2790}' | // ➐ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN]
1285 '\u{FF17}' // 7 [FULLWIDTH DIGIT SEVEN]
1286 => Some("7"),
1287 '\u{248E}' // ⒎ [DIGIT SEVEN FULL STOP]
1288 => Some("7."),
1289 '\u{247A}' // ⑺ [PARENTHESIZED DIGIT SEVEN]
1290 => Some("(7)"),
1291 '\u{2078}' | // ⁸ [SUPERSCRIPT EIGHT]
1292 '\u{2088}' | // ₈ [SUBSCRIPT EIGHT]
1293 '\u{2467}' | // ⑧ [CIRCLED DIGIT EIGHT]
1294 '\u{24FC}' | // ⓼ [DOUBLE CIRCLED DIGIT EIGHT]
1295 '\u{277D}' | // ❽ [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT]
1296 '\u{2787}' | // ➇ [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT]
1297 '\u{2791}' | // ➑ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT]
1298 '\u{FF18}' // 8 [FULLWIDTH DIGIT EIGHT]
1299 => Some("8"),
1300 '\u{248F}' // ⒏ [DIGIT EIGHT FULL STOP]
1301 => Some("8."),
1302 '\u{247B}' // ⑻ [PARENTHESIZED DIGIT EIGHT]
1303 => Some("(8)"),
1304 '\u{2079}' | // ⁹ [SUPERSCRIPT NINE]
1305 '\u{2089}' | // ₉ [SUBSCRIPT NINE]
1306 '\u{2468}' | // ⑨ [CIRCLED DIGIT NINE]
1307 '\u{24FD}' | // ⓽ [DOUBLE CIRCLED DIGIT NINE]
1308 '\u{277E}' | // ❾ [DINGBAT NEGATIVE CIRCLED DIGIT NINE]
1309 '\u{2788}' | // ➈ [DINGBAT CIRCLED SANS-SERIF DIGIT NINE]
1310 '\u{2792}' | // ➒ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE]
1311 '\u{FF19}' // 9 [FULLWIDTH DIGIT NINE]
1312 => Some("9"),
1313 '\u{2490}' // ⒐ [DIGIT NINE FULL STOP]
1314 => Some("9."),
1315 '\u{247C}' // ⑼ [PARENTHESIZED DIGIT NINE]
1316 => Some("(9)"),
1317 '\u{2469}' | // ⑩ [CIRCLED NUMBER TEN]
1318 '\u{24FE}' | // ⓾ [DOUBLE CIRCLED NUMBER TEN]
1319 '\u{277F}' | // ❿ [DINGBAT NEGATIVE CIRCLED NUMBER TEN]
1320 '\u{2789}' | // ➉ [DINGBAT CIRCLED SANS-SERIF NUMBER TEN]
1321 '\u{2793}' // ➓ [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN]
1322 => Some("10"),
1323 '\u{2491}' // ⒑ [NUMBER TEN FULL STOP]
1324 => Some("10."),
1325 '\u{247D}' // ⑽ [PARENTHESIZED NUMBER TEN]
1326 => Some("(10)"),
1327 '\u{246A}' | // ⑪ [CIRCLED NUMBER ELEVEN]
1328 '\u{24EB}' // ⓫ [NEGATIVE CIRCLED NUMBER ELEVEN]
1329 => Some("11"),
1330 '\u{2492}' // ⒒ [NUMBER ELEVEN FULL STOP]
1331 => Some("11."),
1332 '\u{247E}' // ⑾ [PARENTHESIZED NUMBER ELEVEN]
1333 => Some("(11)"),
1334 '\u{246B}' | // ⑫ [CIRCLED NUMBER TWELVE]
1335 '\u{24EC}' // ⓬ [NEGATIVE CIRCLED NUMBER TWELVE]
1336 => Some("12"),
1337 '\u{2493}' // ⒓ [NUMBER TWELVE FULL STOP]
1338 => Some("12."),
1339 '\u{247F}' // ⑿ [PARENTHESIZED NUMBER TWELVE]
1340 => Some("(12)"),
1341 '\u{246C}' | // ⑬ [CIRCLED NUMBER THIRTEEN]
1342 '\u{24ED}' // ⓭ [NEGATIVE CIRCLED NUMBER THIRTEEN]
1343 => Some("13"),
1344 '\u{2494}' // ⒔ [NUMBER THIRTEEN FULL STOP]
1345 => Some("13."),
1346 '\u{2480}' // ⒀ [PARENTHESIZED NUMBER THIRTEEN]
1347 => Some("(13)"),
1348 '\u{246D}' | // ⑭ [CIRCLED NUMBER FOURTEEN]
1349 '\u{24EE}' // ⓮ [NEGATIVE CIRCLED NUMBER FOURTEEN]
1350 => Some("14"),
1351 '\u{2495}' // ⒕ [NUMBER FOURTEEN FULL STOP]
1352 => Some("14."),
1353 '\u{2481}' // ⒁ [PARENTHESIZED NUMBER FOURTEEN]
1354 => Some("(14)"),
1355 '\u{246E}' | // ⑮ [CIRCLED NUMBER FIFTEEN]
1356 '\u{24EF}' // ⓯ [NEGATIVE CIRCLED NUMBER FIFTEEN]
1357 => Some("15"),
1358 '\u{2496}' // ⒖ [NUMBER FIFTEEN FULL STOP]
1359 => Some("15."),
1360 '\u{2482}' // ⒂ [PARENTHESIZED NUMBER FIFTEEN]
1361 => Some("(15)"),
1362 '\u{246F}' | // ⑯ [CIRCLED NUMBER SIXTEEN]
1363 '\u{24F0}' // ⓰ [NEGATIVE CIRCLED NUMBER SIXTEEN]
1364 => Some("16"),
1365 '\u{2497}' // ⒗ [NUMBER SIXTEEN FULL STOP]
1366 => Some("16."),
1367 '\u{2483}' // ⒃ [PARENTHESIZED NUMBER SIXTEEN]
1368 => Some("(16)"),
1369 '\u{2470}' | // ⑰ [CIRCLED NUMBER SEVENTEEN]
1370 '\u{24F1}' // ⓱ [NEGATIVE CIRCLED NUMBER SEVENTEEN]
1371 => Some("17"),
1372 '\u{2498}' // ⒘ [NUMBER SEVENTEEN FULL STOP]
1373 => Some("17."),
1374 '\u{2484}' // ⒄ [PARENTHESIZED NUMBER SEVENTEEN]
1375 => Some("(17)"),
1376 '\u{2471}' | // ⑱ [CIRCLED NUMBER EIGHTEEN]
1377 '\u{24F2}' // ⓲ [NEGATIVE CIRCLED NUMBER EIGHTEEN]
1378 => Some("18"),
1379 '\u{2499}' // ⒙ [NUMBER EIGHTEEN FULL STOP]
1380 => Some("18."),
1381 '\u{2485}' // ⒅ [PARENTHESIZED NUMBER EIGHTEEN]
1382 => Some("(18)"),
1383 '\u{2472}' | // ⑲ [CIRCLED NUMBER NINETEEN]
1384 '\u{24F3}' // ⓳ [NEGATIVE CIRCLED NUMBER NINETEEN]
1385 => Some("19"),
1386 '\u{249A}' // ⒚ [NUMBER NINETEEN FULL STOP]
1387 => Some("19."),
1388 '\u{2486}' // ⒆ [PARENTHESIZED NUMBER NINETEEN]
1389 => Some("(19)"),
1390 '\u{2473}' | // ⑳ [CIRCLED NUMBER TWENTY]
1391 '\u{24F4}' // ⓴ [NEGATIVE CIRCLED NUMBER TWENTY]
1392 => Some("20"),
1393 '\u{249B}' // ⒛ [NUMBER TWENTY FULL STOP]
1394 => Some("20."),
1395 '\u{2487}' // ⒇ [PARENTHESIZED NUMBER TWENTY]
1396 => Some("(20)"),
1397 '\u{00AB}' | // « [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK]
1398 '\u{00BB}' | // » [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK]
1399 '\u{201C}' | // “ [LEFT DOUBLE QUOTATION MARK]
1400 '\u{201D}' | // ” [RIGHT DOUBLE QUOTATION MARK]
1401 '\u{201E}' | // „ [DOUBLE LOW-9 QUOTATION MARK]
1402 '\u{2033}' | // ″ [DOUBLE PRIME]
1403 '\u{2036}' | // ‶ [REVERSED DOUBLE PRIME]
1404 '\u{275D}' | // ❝ [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT]
1405 '\u{275E}' | // ❞ [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT]
1406 '\u{276E}' | // ❮ [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT]
1407 '\u{276F}' | // ❯ [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT]
1408 '\u{FF02}' // " [FULLWIDTH QUOTATION MARK]
1409 => Some("\""),
1410 '\u{2018}' | // ‘ [LEFT SINGLE QUOTATION MARK]
1411 '\u{2019}' | // ’ [RIGHT SINGLE QUOTATION MARK]
1412 '\u{201A}' | // ‚ [SINGLE LOW-9 QUOTATION MARK]
1413 '\u{201B}' | // ‛ [SINGLE HIGH-REVERSED-9 QUOTATION MARK]
1414 '\u{2032}' | // ′ [PRIME]
1415 '\u{2035}' | // ‵ [REVERSED PRIME]
1416 '\u{2039}' | // ‹ [SINGLE LEFT-POINTING ANGLE QUOTATION MARK]
1417 '\u{203A}' | // › [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK]
1418 '\u{275B}' | // ❛ [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT]
1419 '\u{275C}' | // ❜ [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT]
1420 '\u{FF07}' // ' [FULLWIDTH APOSTROPHE]
1421 => Some("\'"),
1422 '\u{2010}' | // ‐ [HYPHEN]
1423 '\u{2011}' | // ‑ [NON-BREAKING HYPHEN]
1424 '\u{2012}' | // ‒ [FIGURE DASH]
1425 '\u{2013}' | // – [EN DASH]
1426 '\u{2014}' | // — [EM DASH]
1427 '\u{207B}' | // ⁻ [SUPERSCRIPT MINUS]
1428 '\u{208B}' | // ₋ [SUBSCRIPT MINUS]
1429 '\u{FF0D}' // - [FULLWIDTH HYPHEN-MINUS]
1430 => Some("-"),
1431 '\u{2045}' | // ⁅ [LEFT SQUARE BRACKET WITH QUILL]
1432 '\u{2772}' | // ❲ [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT]
1433 '\u{FF3B}' // [ [FULLWIDTH LEFT SQUARE BRACKET]
1434 => Some("["),
1435 '\u{2046}' | // ⁆ [RIGHT SQUARE BRACKET WITH QUILL]
1436 '\u{2773}' | // ❳ [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT]
1437 '\u{FF3D}' // ] [FULLWIDTH RIGHT SQUARE BRACKET]
1438 => Some("]"),
1439 '\u{207D}' | // ⁽ [SUPERSCRIPT LEFT PARENTHESIS]
1440 '\u{208D}' | // ₍ [SUBSCRIPT LEFT PARENTHESIS]
1441 '\u{2768}' | // ❨ [MEDIUM LEFT PARENTHESIS ORNAMENT]
1442 '\u{276A}' | // ❪ [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT]
1443 '\u{FF08}' // ( [FULLWIDTH LEFT PARENTHESIS]
1444 => Some("("),
1445 '\u{2E28}' // ⸨ [LEFT DOUBLE PARENTHESIS]
1446 => Some("(("),
1447 '\u{207E}' | // ⁾ [SUPERSCRIPT RIGHT PARENTHESIS]
1448 '\u{208E}' | // ₎ [SUBSCRIPT RIGHT PARENTHESIS]
1449 '\u{2769}' | // ❩ [MEDIUM RIGHT PARENTHESIS ORNAMENT]
1450 '\u{276B}' | // ❫ [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT]
1451 '\u{FF09}' // ) [FULLWIDTH RIGHT PARENTHESIS]
1452 => Some(")"),
1453 '\u{2E29}' // ⸩ [RIGHT DOUBLE PARENTHESIS]
1454 => Some("))"),
1455 '\u{276C}' | // ❬ [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT]
1456 '\u{2770}' | // ❰ [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT]
1457 '\u{FF1C}' // < [FULLWIDTH LESS-THAN SIGN]
1458 => Some("<"),
1459 '\u{276D}' | // ❭ [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT]
1460 '\u{2771}' | // ❱ [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT]
1461 '\u{FF1E}' // > [FULLWIDTH GREATER-THAN SIGN]
1462 => Some(">"),
1463 '\u{2774}' | // ❴ [MEDIUM LEFT CURLY BRACKET ORNAMENT]
1464 '\u{FF5B}' // { [FULLWIDTH LEFT CURLY BRACKET]
1465 => Some("{"),
1466 '\u{2775}' | // ❵ [MEDIUM RIGHT CURLY BRACKET ORNAMENT]
1467 '\u{FF5D}' // } [FULLWIDTH RIGHT CURLY BRACKET]
1468 => Some("}"),
1469 '\u{207A}' | // ⁺ [SUPERSCRIPT PLUS SIGN]
1470 '\u{208A}' | // ₊ [SUBSCRIPT PLUS SIGN]
1471 '\u{FF0B}' // + [FULLWIDTH PLUS SIGN]
1472 => Some("+"),
1473 '\u{207C}' | // ⁼ [SUPERSCRIPT EQUALS SIGN]
1474 '\u{208C}' | // ₌ [SUBSCRIPT EQUALS SIGN]
1475 '\u{FF1D}' // = [FULLWIDTH EQUALS SIGN]
1476 => Some("="),
1477 '\u{FF01}' // ! [FULLWIDTH EXCLAMATION MARK]
1478 => Some("!"),
1479 '\u{203C}' // ‼ [DOUBLE EXCLAMATION MARK]
1480 => Some("!!"),
1481 '\u{2049}' // ⁉ [EXCLAMATION QUESTION MARK]
1482 => Some("!?"),
1483 '\u{FF03}' // # [FULLWIDTH NUMBER SIGN]
1484 => Some("#"),
1485 '\u{FF04}' // $ [FULLWIDTH DOLLAR SIGN]
1486 => Some("$"),
1487 '\u{2052}' | // ⁒ [COMMERCIAL MINUS SIGN]
1488 '\u{FF05}' // % [FULLWIDTH PERCENT SIGN]
1489 => Some("%"),
1490 '\u{FF06}' // & [FULLWIDTH AMPERSAND]
1491 => Some("&"),
1492 '\u{204E}' | // ⁎ [LOW ASTERISK]
1493 '\u{FF0A}' // * [FULLWIDTH ASTERISK]
1494 => Some("*"),
1495 '\u{FF0C}' // , [FULLWIDTH COMMA]
1496 => Some(","),
1497 '\u{FF0E}' // . [FULLWIDTH FULL STOP]
1498 => Some("."),
1499 '\u{2044}' | // ⁄ [FRACTION SLASH]
1500 '\u{FF0F}' // / [FULLWIDTH SOLIDUS]
1501 => Some("/"),
1502 '\u{FF1A}' // : [FULLWIDTH COLON]
1503 => Some(":"),
1504 '\u{204F}' | // ⁏ [REVERSED SEMICOLON]
1505 '\u{FF1B}' // ; [FULLWIDTH SEMICOLON]
1506 => Some(";"),
1507 '\u{FF1F}' // ? [FULLWIDTH QUESTION MARK]
1508 => Some("?"),
1509 '\u{2047}' // ⁇ [DOUBLE QUESTION MARK]
1510 => Some("??"),
1511 '\u{2048}' // ⁈ [QUESTION EXCLAMATION MARK]
1512 => Some("?!"),
1513 '\u{FF20}' // @ [FULLWIDTH COMMERCIAL AT]
1514 => Some("@"),
1515 '\u{FF3C}' // \ [FULLWIDTH REVERSE SOLIDUS]
1516 => Some("\\"),
1517 '\u{2038}' | // ‸ [CARET]
1518 '\u{FF3E}' // ^ [FULLWIDTH CIRCUMFLEX ACCENT]
1519 => Some("^"),
1520 '\u{FF3F}' // _ [FULLWIDTH LOW LINE]
1521 => Some("_"),
1522 '\u{2053}' | // ⁓ [SWUNG DASH]
1523 '\u{FF5E}' // ~ [FULLWIDTH TILDE]
1524 => Some("~"),
1525 _ => None
1526 }
1527}
1528
1529// https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilter.java#L187
1530fn to_ascii(text: &str, output: &mut String) {
1531 output.clear();
1532
1533 for c in text.chars() {
1534 if let Some(folded) = fold_non_ascii_char(c) {
1535 output.push_str(folded);
1536 } else {
1537 output.push(c);
1538 }
1539 }
1540}
1541
1542#[cfg(test)]
1543mod tests {
1544 use std::iter;
1545
1546 use super::to_ascii;
1547 use crate::tokenizer::{AsciiFoldingFilter, RawTokenizer, SimpleTokenizer, TextAnalyzer};
1548
1549 #[test]
1550 fn test_ascii_folding() {
1551 assert_eq!(&folding_helper("Ràmon"), &["Ramon"]);
1552 assert_eq!(&folding_helper("accentué"), &["accentue"]);
1553 assert_eq!(&folding_helper("âäàéè"), &["aaaee"]);
1554 }
1555
1556 #[test]
1557 fn test_no_change() {
1558 assert_eq!(&folding_helper("Usagi"), &["Usagi"]);
1559 }
1560
1561 fn folding_helper(text: &str) -> Vec<String> {
1562 let mut tokens = Vec::new();
1563 TextAnalyzer::from(SimpleTokenizer)
1564 .filter(AsciiFoldingFilter)
1565 .token_stream(text)
1566 .process(&mut |token| {
1567 tokens.push(token.text.clone());
1568 });
1569 tokens
1570 }
1571
1572 fn folding_using_raw_tokenizer_helper(text: &str) -> String {
1573 let mut token_stream = TextAnalyzer::from(RawTokenizer)
1574 .filter(AsciiFoldingFilter)
1575 .token_stream(text);
1576 token_stream.advance();
1577 token_stream.token().text.clone()
1578 }
1579
1580 #[test]
1581 fn test_latin1_characters() {
1582 let latin1_string = "Des mot clés À LA CHAÎNE À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï IJ Ð Ñ
1583 Ò Ó Ô Õ Ö Ø Œ Þ Ù Ú Û Ü Ý Ÿ à á â ã ä å æ ç è é ê ë ì í î ï ij
1584 ð ñ ò ó ô õ ö ø œ ß þ ù ú û ü ý ÿ fi fl";
1585 let mut vec: Vec<&str> = vec!["Des", "mot", "cles", "A", "LA", "CHAINE"];
1586 vec.extend(iter::repeat("A").take(6));
1587 vec.extend(iter::repeat("AE").take(1));
1588 vec.extend(iter::repeat("C").take(1));
1589 vec.extend(iter::repeat("E").take(4));
1590 vec.extend(iter::repeat("I").take(4));
1591 vec.extend(iter::repeat("IJ").take(1));
1592 vec.extend(iter::repeat("D").take(1));
1593 vec.extend(iter::repeat("N").take(1));
1594 vec.extend(iter::repeat("O").take(6));
1595 vec.extend(iter::repeat("OE").take(1));
1596 vec.extend(iter::repeat("TH").take(1));
1597 vec.extend(iter::repeat("U").take(4));
1598 vec.extend(iter::repeat("Y").take(2));
1599 vec.extend(iter::repeat("a").take(6));
1600 vec.extend(iter::repeat("ae").take(1));
1601 vec.extend(iter::repeat("c").take(1));
1602 vec.extend(iter::repeat("e").take(4));
1603 vec.extend(iter::repeat("i").take(4));
1604 vec.extend(iter::repeat("ij").take(1));
1605 vec.extend(iter::repeat("d").take(1));
1606 vec.extend(iter::repeat("n").take(1));
1607 vec.extend(iter::repeat("o").take(6));
1608 vec.extend(iter::repeat("oe").take(1));
1609 vec.extend(iter::repeat("ss").take(1));
1610 vec.extend(iter::repeat("th").take(1));
1611 vec.extend(iter::repeat("u").take(4));
1612 vec.extend(iter::repeat("y").take(2));
1613 vec.extend(iter::repeat("fi").take(1));
1614 vec.extend(iter::repeat("fl").take(1));
1615 assert_eq!(folding_helper(latin1_string), vec);
1616 }
1617
1618 #[test]
1619 fn test_unmodified_letters() {
1620 assert_eq!(
1621 folding_using_raw_tokenizer_helper("§ ¦ ¤ END"),
1622 "§ ¦ ¤ END".to_string()
1623 );
1624 }
1625
1626 #[test]
1627 fn test_to_ascii() {
1628 let input = "Rámon".to_string();
1629 let mut buffer = String::new();
1630 to_ascii(&input, &mut buffer);
1631 assert_eq!("Ramon", buffer);
1632 }
1633
1634 #[test]
1635 fn test_all_foldings() {
1636 // those folding is a copy of
1637 // https://github.com/apache/lucene-solr/blob/28d187acd1e391723eb6e1b5445f22abf5580a80/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestASCIIFoldingFilter.java
1638 // useful regex to adapt to a Rust structure:
1639 // 1. Preg and replace folded:
1640 // - **REGEX** |,"(.){3,5}", // Folded result|
1641 // - **REPLACEMENT** ], "$1".to_string(), ), ( vec![
1642 // 2. Preg and replace characters:
1643 // - **REGEX** |[\+]{0,1} "(.{1,3})" // U\+|
1644 // - **REPLACEMENT** "$1", // U+
1645 let foldings: Vec<(&[&str], &str)> = vec![
1646 (
1647 &[
1648 "À", // U+00C0: LATIN CAPITAL LETTER A WITH GRAVE
1649 "Á", // U+00C1: LATIN CAPITAL LETTER A WITH ACUTE
1650 "Â", // U+00C2: LATIN CAPITAL LETTER A WITH CIRCUMFLEX
1651 "Ã", // U+00C3: LATIN CAPITAL LETTER A WITH TILDE
1652 "Ä", // U+00C4: LATIN CAPITAL LETTER A WITH DIAERESIS
1653 "Å", // U+00C5: LATIN CAPITAL LETTER A WITH RING ABOVE
1654 "Ā", // U+0100: LATIN CAPITAL LETTER A WITH MACRON
1655 "Ă", // U+0102: LATIN CAPITAL LETTER A WITH BREVE
1656 "Ą", // U+0104: LATIN CAPITAL LETTER A WITH OGONEK
1657 "Ə", // U+018F: LATIN CAPITAL LETTER SCHWA
1658 "Ǎ", // U+01CD: LATIN CAPITAL LETTER A WITH CARON
1659 "Ǟ", // U+01DE: LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
1660 "Ǡ", // U+01E0: LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
1661 "Ǻ", // U+01FA: LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
1662 "Ȁ", // U+0200: LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
1663 "Ȃ", // U+0202: LATIN CAPITAL LETTER A WITH INVERTED BREVE
1664 "Ȧ", // U+0226: LATIN CAPITAL LETTER A WITH DOT ABOVE
1665 "Ⱥ", // U+023A: LATIN CAPITAL LETTER A WITH STROKE
1666 "ᴀ", // U+1D00: LATIN LETTER SMALL CAPITAL A
1667 "Ḁ", // U+1E00: LATIN CAPITAL LETTER A WITH RING BELOW
1668 "Ạ", // U+1EA0: LATIN CAPITAL LETTER A WITH DOT BELOW
1669 "Ả", // U+1EA2: LATIN CAPITAL LETTER A WITH HOOK ABOVE
1670 "Ấ", // U+1EA4: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
1671 "Ầ", // U+1EA6: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
1672 "Ẩ", // U+1EA8: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
1673 "Ẫ", // U+1EAA: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
1674 "Ậ", // U+1EAC: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
1675 "Ắ", // U+1EAE: LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
1676 "Ằ", // U+1EB0: LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
1677 "Ẳ", // U+1EB2: LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
1678 "Ẵ", // U+1EB4: LATIN CAPITAL LETTER A WITH BREVE AND TILDE
1679 "Ặ", // U+1EB6: LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
1680 "Ⓐ", // U+24B6: CIRCLED LATIN CAPITAL LETTER A
1681 "A", // U+FF21: FULLWIDTH LATIN CAPITAL LETTER A
1682 ],
1683 "A",
1684 ),
1685 (
1686 &[
1687 "à", // U+00E0: LATIN SMALL LETTER A WITH GRAVE
1688 "á", // U+00E1: LATIN SMALL LETTER A WITH ACUTE
1689 "â", // U+00E2: LATIN SMALL LETTER A WITH CIRCUMFLEX
1690 "ã", // U+00E3: LATIN SMALL LETTER A WITH TILDE
1691 "ä", // U+00E4: LATIN SMALL LETTER A WITH DIAERESIS
1692 "å", // U+00E5: LATIN SMALL LETTER A WITH RING ABOVE
1693 "ā", // U+0101: LATIN SMALL LETTER A WITH MACRON
1694 "ă", // U+0103: LATIN SMALL LETTER A WITH BREVE
1695 "ą", // U+0105: LATIN SMALL LETTER A WITH OGONEK
1696 "ǎ", // U+01CE: LATIN SMALL LETTER A WITH CARON
1697 "ǟ", // U+01DF: LATIN SMALL LETTER A WITH DIAERESIS AND MACRON
1698 "ǡ", // U+01E1: LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON
1699 "ǻ", // U+01FB: LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE
1700 "ȁ", // U+0201: LATIN SMALL LETTER A WITH DOUBLE GRAVE
1701 "ȃ", // U+0203: LATIN SMALL LETTER A WITH INVERTED BREVE
1702 "ȧ", // U+0227: LATIN SMALL LETTER A WITH DOT ABOVE
1703 "ɐ", // U+0250: LATIN SMALL LETTER TURNED A
1704 "ə", // U+0259: LATIN SMALL LETTER SCHWA
1705 "ɚ", // U+025A: LATIN SMALL LETTER SCHWA WITH HOOK
1706 "ᶏ", // U+1D8F: LATIN SMALL LETTER A WITH RETROFLEX HOOK
1707 "ḁ", // U+1E01: LATIN SMALL LETTER A WITH RING BELOW
1708 "ᶕ", // U+1D95: LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK
1709 "ẚ", // U+1E9A: LATIN SMALL LETTER A WITH RIGHT HALF RING
1710 "ạ", // U+1EA1: LATIN SMALL LETTER A WITH DOT BELOW
1711 "ả", // U+1EA3: LATIN SMALL LETTER A WITH HOOK ABOVE
1712 "ấ", // U+1EA5: LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE
1713 "ầ", // U+1EA7: LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE
1714 "ẩ", // U+1EA9: LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
1715 "ẫ", // U+1EAB: LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE
1716 "ậ", // U+1EAD: LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW
1717 "ắ", // U+1EAF: LATIN SMALL LETTER A WITH BREVE AND ACUTE
1718 "ằ", // U+1EB1: LATIN SMALL LETTER A WITH BREVE AND GRAVE
1719 "ẳ", // U+1EB3: LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE
1720 "ẵ", // U+1EB5: LATIN SMALL LETTER A WITH BREVE AND TILDE
1721 "ặ", // U+1EB7: LATIN SMALL LETTER A WITH BREVE AND DOT BELOW
1722 "ₐ", // U+2090: LATIN SUBSCRIPT SMALL LETTER A
1723 "ₔ", // U+2094: LATIN SUBSCRIPT SMALL LETTER SCHWA
1724 "ⓐ", // U+24D0: CIRCLED LATIN SMALL LETTER A
1725 "ⱥ", // U+2C65: LATIN SMALL LETTER A WITH STROKE
1726 "Ɐ", // U+2C6F: LATIN CAPITAL LETTER TURNED A
1727 "a", // U+FF41: FULLWIDTH LATIN SMALL LETTER A
1728 ],
1729 "a",
1730 ),
1731 (
1732 &[
1733 "Ꜳ", // U+A732: LATIN CAPITAL LETTER AA
1734 ],
1735 "AA",
1736 ),
1737 (
1738 &[
1739 "Æ", // U+00C6: LATIN CAPITAL LETTER AE
1740 "Ǣ", // U+01E2: LATIN CAPITAL LETTER AE WITH MACRON
1741 "Ǽ", // U+01FC: LATIN CAPITAL LETTER AE WITH ACUTE
1742 "ᴁ", // U+1D01: LATIN LETTER SMALL CAPITAL AE
1743 ],
1744 "AE",
1745 ),
1746 (
1747 &[
1748 "Ꜵ", // U+A734: LATIN CAPITAL LETTER AO
1749 ],
1750 "AO",
1751 ),
1752 (
1753 &[
1754 "Ꜷ", // U+A736: LATIN CAPITAL LETTER AU
1755 ],
1756 "AU",
1757 ),
1758 (
1759 &[
1760 "Ꜹ", // U+A738: LATIN CAPITAL LETTER AV
1761 "Ꜻ", // U+A73A: LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR
1762 ],
1763 "AV",
1764 ),
1765 (
1766 &[
1767 "Ꜽ", // U+A73C: LATIN CAPITAL LETTER AY
1768 ],
1769 "AY",
1770 ),
1771 (
1772 &[
1773 "⒜", // U+249C: PARENTHESIZED LATIN SMALL LETTER A
1774 ],
1775 "(a)",
1776 ),
1777 (
1778 &[
1779 "ꜳ", // U+A733: LATIN SMALL LETTER AA
1780 ],
1781 "aa",
1782 ),
1783 (
1784 &[
1785 "æ", // U+00E6: LATIN SMALL LETTER AE
1786 "ǣ", // U+01E3: LATIN SMALL LETTER AE WITH MACRON
1787 "ǽ", // U+01FD: LATIN SMALL LETTER AE WITH ACUTE
1788 "ᴂ", // U+1D02: LATIN SMALL LETTER TURNED AE
1789 ],
1790 "ae",
1791 ),
1792 (
1793 &[
1794 "ꜵ", // U+A735: LATIN SMALL LETTER AO
1795 ],
1796 "ao",
1797 ),
1798 (
1799 &[
1800 "ꜷ", // U+A737: LATIN SMALL LETTER AU
1801 ],
1802 "au",
1803 ),
1804 (
1805 &[
1806 "ꜹ", // U+A739: LATIN SMALL LETTER AV
1807 "ꜻ", // U+A73B: LATIN SMALL LETTER AV WITH HORIZONTAL BAR
1808 ],
1809 "av",
1810 ),
1811 (
1812 &[
1813 "ꜽ", // U+A73D: LATIN SMALL LETTER AY
1814 ],
1815 "ay",
1816 ),
1817 (
1818 &[
1819 "Ɓ", // U+0181: LATIN CAPITAL LETTER B WITH HOOK
1820 "Ƃ", // U+0182: LATIN CAPITAL LETTER B WITH TOPBAR
1821 "Ƀ", // U+0243: LATIN CAPITAL LETTER B WITH STROKE
1822 "ʙ", // U+0299: LATIN LETTER SMALL CAPITAL B
1823 "ᴃ", // U+1D03: LATIN LETTER SMALL CAPITAL BARRED B
1824 "Ḃ", // U+1E02: LATIN CAPITAL LETTER B WITH DOT ABOVE
1825 "Ḅ", // U+1E04: LATIN CAPITAL LETTER B WITH DOT BELOW
1826 "Ḇ", // U+1E06: LATIN CAPITAL LETTER B WITH LINE BELOW
1827 "Ⓑ", // U+24B7: CIRCLED LATIN CAPITAL LETTER B
1828 "B", // U+FF22: FULLWIDTH LATIN CAPITAL LETTER B
1829 ],
1830 "B",
1831 ),
1832 (
1833 &[
1834 "ƀ", // U+0180: LATIN SMALL LETTER B WITH STROKE
1835 "ƃ", // U+0183: LATIN SMALL LETTER B WITH TOPBAR
1836 "ɓ", // U+0253: LATIN SMALL LETTER B WITH HOOK
1837 "ᵬ", // U+1D6C: LATIN SMALL LETTER B WITH MIDDLE TILDE
1838 "ᶀ", // U+1D80: LATIN SMALL LETTER B WITH PALATAL HOOK
1839 "ḃ", // U+1E03: LATIN SMALL LETTER B WITH DOT ABOVE
1840 "ḅ", // U+1E05: LATIN SMALL LETTER B WITH DOT BELOW
1841 "ḇ", // U+1E07: LATIN SMALL LETTER B WITH LINE BELOW
1842 "ⓑ", // U+24D1: CIRCLED LATIN SMALL LETTER B
1843 "b", // U+FF42: FULLWIDTH LATIN SMALL LETTER B
1844 ],
1845 "b",
1846 ),
1847 (
1848 &[
1849 "⒝", // U+249D: PARENTHESIZED LATIN SMALL LETTER B
1850 ],
1851 "(b)",
1852 ),
1853 (
1854 &[
1855 "Ç", // U+00C7: LATIN CAPITAL LETTER C WITH CEDILLA
1856 "Ć", // U+0106: LATIN CAPITAL LETTER C WITH ACUTE
1857 "Ĉ", // U+0108: LATIN CAPITAL LETTER C WITH CIRCUMFLEX
1858 "Ċ", // U+010A: LATIN CAPITAL LETTER C WITH DOT ABOVE
1859 "Č", // U+010C: LATIN CAPITAL LETTER C WITH CARON
1860 "Ƈ", // U+0187: LATIN CAPITAL LETTER C WITH HOOK
1861 "Ȼ", // U+023B: LATIN CAPITAL LETTER C WITH STROKE
1862 "ʗ", // U+0297: LATIN LETTER STRETCHED C
1863 "ᴄ", // U+1D04: LATIN LETTER SMALL CAPITAL C
1864 "Ḉ", // U+1E08: LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
1865 "Ⓒ", // U+24B8: CIRCLED LATIN CAPITAL LETTER C
1866 "C", // U+FF23: FULLWIDTH LATIN CAPITAL LETTER C
1867 ],
1868 "C",
1869 ),
1870 (
1871 &[
1872 "ç", // U+00E7: LATIN SMALL LETTER C WITH CEDILLA
1873 "ć", // U+0107: LATIN SMALL LETTER C WITH ACUTE
1874 "ĉ", // U+0109: LATIN SMALL LETTER C WITH CIRCUMFLEX
1875 "ċ", // U+010B: LATIN SMALL LETTER C WITH DOT ABOVE
1876 "č", // U+010D: LATIN SMALL LETTER C WITH CARON
1877 "ƈ", // U+0188: LATIN SMALL LETTER C WITH HOOK
1878 "ȼ", // U+023C: LATIN SMALL LETTER C WITH STROKE
1879 "ɕ", // U+0255: LATIN SMALL LETTER C WITH CURL
1880 "ḉ", // U+1E09: LATIN SMALL LETTER C WITH CEDILLA AND ACUTE
1881 "ↄ", // U+2184: LATIN SMALL LETTER REVERSED C
1882 "ⓒ", // U+24D2: CIRCLED LATIN SMALL LETTER C
1883 "Ꜿ", // U+A73E: LATIN CAPITAL LETTER REVERSED C WITH DOT
1884 "ꜿ", // U+A73F: LATIN SMALL LETTER REVERSED C WITH DOT
1885 "c", // U+FF43: FULLWIDTH LATIN SMALL LETTER C
1886 ],
1887 "c",
1888 ),
1889 (
1890 &[
1891 "⒞", // U+249E: PARENTHESIZED LATIN SMALL LETTER C
1892 ],
1893 "(c)",
1894 ),
1895 (
1896 &[
1897 "Ð", // U+00D0: LATIN CAPITAL LETTER ETH
1898 "Ď", // U+010E: LATIN CAPITAL LETTER D WITH CARON
1899 "Đ", // U+0110: LATIN CAPITAL LETTER D WITH STROKE
1900 "Ɖ", // U+0189: LATIN CAPITAL LETTER AFRICAN D
1901 "Ɗ", // U+018A: LATIN CAPITAL LETTER D WITH HOOK
1902 "Ƌ", // U+018B: LATIN CAPITAL LETTER D WITH TOPBAR
1903 "ᴅ", // U+1D05: LATIN LETTER SMALL CAPITAL D
1904 "ᴆ", // U+1D06: LATIN LETTER SMALL CAPITAL ETH
1905 "Ḋ", // U+1E0A: LATIN CAPITAL LETTER D WITH DOT ABOVE
1906 "Ḍ", // U+1E0C: LATIN CAPITAL LETTER D WITH DOT BELOW
1907 "Ḏ", // U+1E0E: LATIN CAPITAL LETTER D WITH LINE BELOW
1908 "Ḑ", // U+1E10: LATIN CAPITAL LETTER D WITH CEDILLA
1909 "Ḓ", // U+1E12: LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
1910 "Ⓓ", // U+24B9: CIRCLED LATIN CAPITAL LETTER D
1911 "Ꝺ", // U+A779: LATIN CAPITAL LETTER INSULAR D
1912 "D", // U+FF24: FULLWIDTH LATIN CAPITAL LETTER D
1913 ],
1914 "D",
1915 ),
1916 (
1917 &[
1918 "ð", // U+00F0: LATIN SMALL LETTER ETH
1919 "ď", // U+010F: LATIN SMALL LETTER D WITH CARON
1920 "đ", // U+0111: LATIN SMALL LETTER D WITH STROKE
1921 "ƌ", // U+018C: LATIN SMALL LETTER D WITH TOPBAR
1922 "ȡ", // U+0221: LATIN SMALL LETTER D WITH CURL
1923 "ɖ", // U+0256: LATIN SMALL LETTER D WITH TAIL
1924 "ɗ", // U+0257: LATIN SMALL LETTER D WITH HOOK
1925 "ᵭ", // U+1D6D: LATIN SMALL LETTER D WITH MIDDLE TILDE
1926 "ᶁ", // U+1D81: LATIN SMALL LETTER D WITH PALATAL HOOK
1927 "ᶑ", // U+1D91: LATIN SMALL LETTER D WITH HOOK AND TAIL
1928 "ḋ", // U+1E0B: LATIN SMALL LETTER D WITH DOT ABOVE
1929 "ḍ", // U+1E0D: LATIN SMALL LETTER D WITH DOT BELOW
1930 "ḏ", // U+1E0F: LATIN SMALL LETTER D WITH LINE BELOW
1931 "ḑ", // U+1E11: LATIN SMALL LETTER D WITH CEDILLA
1932 "ḓ", // U+1E13: LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW
1933 "ⓓ", // U+24D3: CIRCLED LATIN SMALL LETTER D
1934 "ꝺ", // U+A77A: LATIN SMALL LETTER INSULAR D
1935 "d", // U+FF44: FULLWIDTH LATIN SMALL LETTER D
1936 ],
1937 "d",
1938 ),
1939 (
1940 &[
1941 "DŽ", // U+01C4: LATIN CAPITAL LETTER DZ WITH CARON
1942 "DZ", // U+01F1: LATIN CAPITAL LETTER DZ
1943 ],
1944 "DZ",
1945 ),
1946 (
1947 &[
1948 "Dž", // U+01C5: LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
1949 "Dz", // U+01F2: LATIN CAPITAL LETTER D WITH SMALL LETTER Z
1950 ],
1951 "Dz",
1952 ),
1953 (
1954 &[
1955 "⒟", // U+249F: PARENTHESIZED LATIN SMALL LETTER D
1956 ],
1957 "(d)",
1958 ),
1959 (
1960 &[
1961 "ȸ", // U+0238: LATIN SMALL LETTER DB DIGRAPH
1962 ],
1963 "db",
1964 ),
1965 (
1966 &[
1967 "dž", // U+01C6: LATIN SMALL LETTER DZ WITH CARON
1968 "dz", // U+01F3: LATIN SMALL LETTER DZ
1969 "ʣ", // U+02A3: LATIN SMALL LETTER DZ DIGRAPH
1970 "ʥ", // U+02A5: LATIN SMALL LETTER DZ DIGRAPH WITH CURL
1971 ],
1972 "dz",
1973 ),
1974 (
1975 &[
1976 "È", // U+00C8: LATIN CAPITAL LETTER E WITH GRAVE
1977 "É", // U+00C9: LATIN CAPITAL LETTER E WITH ACUTE
1978 "Ê", // U+00CA: LATIN CAPITAL LETTER E WITH CIRCUMFLEX
1979 "Ë", // U+00CB: LATIN CAPITAL LETTER E WITH DIAERESIS
1980 "Ē", // U+0112: LATIN CAPITAL LETTER E WITH MACRON
1981 "Ĕ", // U+0114: LATIN CAPITAL LETTER E WITH BREVE
1982 "Ė", // U+0116: LATIN CAPITAL LETTER E WITH DOT ABOVE
1983 "Ę", // U+0118: LATIN CAPITAL LETTER E WITH OGONEK
1984 "Ě", // U+011A: LATIN CAPITAL LETTER E WITH CARON
1985 "Ǝ", // U+018E: LATIN CAPITAL LETTER REVERSED E
1986 "Ɛ", // U+0190: LATIN CAPITAL LETTER OPEN E
1987 "Ȅ", // U+0204: LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
1988 "Ȇ", // U+0206: LATIN CAPITAL LETTER E WITH INVERTED BREVE
1989 "Ȩ", // U+0228: LATIN CAPITAL LETTER E WITH CEDILLA
1990 "Ɇ", // U+0246: LATIN CAPITAL LETTER E WITH STROKE
1991 "ᴇ", // U+1D07: LATIN LETTER SMALL CAPITAL E
1992 "Ḕ", // U+1E14: LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
1993 "Ḗ", // U+1E16: LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
1994 "Ḙ", // U+1E18: LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
1995 "Ḛ", // U+1E1A: LATIN CAPITAL LETTER E WITH TILDE BELOW
1996 "Ḝ", // U+1E1C: LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
1997 "Ẹ", // U+1EB8: LATIN CAPITAL LETTER E WITH DOT BELOW
1998 "Ẻ", // U+1EBA: LATIN CAPITAL LETTER E WITH HOOK ABOVE
1999 "Ẽ", // U+1EBC: LATIN CAPITAL LETTER E WITH TILDE
2000 "Ế", // U+1EBE: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
2001 "Ề", // U+1EC0: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
2002 "Ể", // U+1EC2: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
2003 "Ễ", // U+1EC4: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
2004 "Ệ", // U+1EC6: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
2005 "Ⓔ", // U+24BA: CIRCLED LATIN CAPITAL LETTER E
2006 "ⱻ", // U+2C7B: LATIN LETTER SMALL CAPITAL TURNED E
2007 "E", // U+FF25: FULLWIDTH LATIN CAPITAL LETTER E
2008 ],
2009 "E",
2010 ),
2011 (
2012 &[
2013 "è", // U+00E8: LATIN SMALL LETTER E WITH GRAVE
2014 "é", // U+00E9: LATIN SMALL LETTER E WITH ACUTE
2015 "ê", // U+00EA: LATIN SMALL LETTER E WITH CIRCUMFLEX
2016 "ë", // U+00EB: LATIN SMALL LETTER E WITH DIAERESIS
2017 "ē", // U+0113: LATIN SMALL LETTER E WITH MACRON
2018 "ĕ", // U+0115: LATIN SMALL LETTER E WITH BREVE
2019 "ė", // U+0117: LATIN SMALL LETTER E WITH DOT ABOVE
2020 "ę", // U+0119: LATIN SMALL LETTER E WITH OGONEK
2021 "ě", // U+011B: LATIN SMALL LETTER E WITH CARON
2022 "ǝ", // U+01DD: LATIN SMALL LETTER TURNED E
2023 "ȅ", // U+0205: LATIN SMALL LETTER E WITH DOUBLE GRAVE
2024 "ȇ", // U+0207: LATIN SMALL LETTER E WITH INVERTED BREVE
2025 "ȩ", // U+0229: LATIN SMALL LETTER E WITH CEDILLA
2026 "ɇ", // U+0247: LATIN SMALL LETTER E WITH STROKE
2027 "ɘ", // U+0258: LATIN SMALL LETTER REVERSED E
2028 "ɛ", // U+025B: LATIN SMALL LETTER OPEN E
2029 "ɜ", // U+025C: LATIN SMALL LETTER REVERSED OPEN E
2030 "ɝ", // U+025D: LATIN SMALL LETTER REVERSED OPEN E WITH HOOK
2031 "ɞ", // U+025E: LATIN SMALL LETTER CLOSED REVERSED OPEN E
2032 "ʚ", // U+029A: LATIN SMALL LETTER CLOSED OPEN E
2033 "ᴈ", // U+1D08: LATIN SMALL LETTER TURNED OPEN E
2034 "ᶒ", // U+1D92: LATIN SMALL LETTER E WITH RETROFLEX HOOK
2035 "ᶓ", // U+1D93: LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK
2036 "ᶔ", // U+1D94: LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK
2037 "ḕ", // U+1E15: LATIN SMALL LETTER E WITH MACRON AND GRAVE
2038 "ḗ", // U+1E17: LATIN SMALL LETTER E WITH MACRON AND ACUTE
2039 "ḙ", // U+1E19: LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW
2040 "ḛ", // U+1E1B: LATIN SMALL LETTER E WITH TILDE BELOW
2041 "ḝ", // U+1E1D: LATIN SMALL LETTER E WITH CEDILLA AND BREVE
2042 "ẹ", // U+1EB9: LATIN SMALL LETTER E WITH DOT BELOW
2043 "ẻ", // U+1EBB: LATIN SMALL LETTER E WITH HOOK ABOVE
2044 "ẽ", // U+1EBD: LATIN SMALL LETTER E WITH TILDE
2045 "ế", // U+1EBF: LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE
2046 "ề", // U+1EC1: LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE
2047 "ể", // U+1EC3: LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
2048 "ễ", // U+1EC5: LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE
2049 "ệ", // U+1EC7: LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW
2050 "ₑ", // U+2091: LATIN SUBSCRIPT SMALL LETTER E
2051 "ⓔ", // U+24D4: CIRCLED LATIN SMALL LETTER E
2052 "ⱸ", // U+2C78: LATIN SMALL LETTER E WITH NOTCH
2053 "e", // U+FF45: FULLWIDTH LATIN SMALL LETTER E
2054 ],
2055 "e",
2056 ),
2057 (
2058 &[
2059 "⒠", // U+24A0: PARENTHESIZED LATIN SMALL LETTER E
2060 ],
2061 "(e)",
2062 ),
2063 (
2064 &[
2065 "Ƒ", // U+0191: LATIN CAPITAL LETTER F WITH HOOK
2066 "Ḟ", // U+1E1E: LATIN CAPITAL LETTER F WITH DOT ABOVE
2067 "Ⓕ", // U+24BB: CIRCLED LATIN CAPITAL LETTER F
2068 "ꜰ", // U+A730: LATIN LETTER SMALL CAPITAL F
2069 "Ꝼ", // U+A77B: LATIN CAPITAL LETTER INSULAR F
2070 "ꟻ", // U+A7FB: LATIN EPIGRAPHIC LETTER REVERSED F
2071 "F", // U+FF26: FULLWIDTH LATIN CAPITAL LETTER F
2072 ],
2073 "F",
2074 ),
2075 (
2076 &[
2077 "ƒ", // U+0192: LATIN SMALL LETTER F WITH HOOK
2078 "ᵮ", // U+1D6E: LATIN SMALL LETTER F WITH MIDDLE TILDE
2079 "ᶂ", // U+1D82: LATIN SMALL LETTER F WITH PALATAL HOOK
2080 "ḟ", // U+1E1F: LATIN SMALL LETTER F WITH DOT ABOVE
2081 "ẛ", // U+1E9B: LATIN SMALL LETTER LONG S WITH DOT ABOVE
2082 "ⓕ", // U+24D5: CIRCLED LATIN SMALL LETTER F
2083 "ꝼ", // U+A77C: LATIN SMALL LETTER INSULAR F
2084 "f", // U+FF46: FULLWIDTH LATIN SMALL LETTER F
2085 ],
2086 "f",
2087 ),
2088 (
2089 &[
2090 "⒡", // U+24A1: PARENTHESIZED LATIN SMALL LETTER F
2091 ],
2092 "(f)",
2093 ),
2094 (
2095 &[
2096 "ff", // U+FB00: LATIN SMALL LIGATURE FF
2097 ],
2098 "ff",
2099 ),
2100 (
2101 &[
2102 "ffi", // U+FB03: LATIN SMALL LIGATURE FFI
2103 ],
2104 "ffi",
2105 ),
2106 (
2107 &[
2108 "ffl", // U+FB04: LATIN SMALL LIGATURE FFL
2109 ],
2110 "ffl",
2111 ),
2112 (
2113 &[
2114 "fi", // U+FB01: LATIN SMALL LIGATURE FI
2115 ],
2116 "fi",
2117 ),
2118 (
2119 &[
2120 "fl", // U+FB02: LATIN SMALL LIGATURE FL
2121 ],
2122 "fl",
2123 ),
2124 (
2125 &[
2126 "Ĝ", // U+011C: LATIN CAPITAL LETTER G WITH CIRCUMFLEX
2127 "Ğ", // U+011E: LATIN CAPITAL LETTER G WITH BREVE
2128 "Ġ", // U+0120: LATIN CAPITAL LETTER G WITH DOT ABOVE
2129 "Ģ", // U+0122: LATIN CAPITAL LETTER G WITH CEDILLA
2130 "Ɠ", // U+0193: LATIN CAPITAL LETTER G WITH HOOK
2131 "Ǥ", // U+01E4: LATIN CAPITAL LETTER G WITH STROKE
2132 "ǥ", // U+01E5: LATIN SMALL LETTER G WITH STROKE
2133 "Ǧ", // U+01E6: LATIN CAPITAL LETTER G WITH CARON
2134 "ǧ", // U+01E7: LATIN SMALL LETTER G WITH CARON
2135 "Ǵ", // U+01F4: LATIN CAPITAL LETTER G WITH ACUTE
2136 "ɢ", // U+0262: LATIN LETTER SMALL CAPITAL G
2137 "ʛ", // U+029B: LATIN LETTER SMALL CAPITAL G WITH HOOK
2138 "Ḡ", // U+1E20: LATIN CAPITAL LETTER G WITH MACRON
2139 "Ⓖ", // U+24BC: CIRCLED LATIN CAPITAL LETTER G
2140 "Ᵹ", // U+A77D: LATIN CAPITAL LETTER INSULAR G
2141 "Ꝿ", // U+A77E: LATIN CAPITAL LETTER TURNED INSULAR G
2142 "G", // U+FF27: FULLWIDTH LATIN CAPITAL LETTER G
2143 ],
2144 "G",
2145 ),
2146 (
2147 &[
2148 "ĝ", // U+011D: LATIN SMALL LETTER G WITH CIRCUMFLEX
2149 "ğ", // U+011F: LATIN SMALL LETTER G WITH BREVE
2150 "ġ", // U+0121: LATIN SMALL LETTER G WITH DOT ABOVE
2151 "ģ", // U+0123: LATIN SMALL LETTER G WITH CEDILLA
2152 "ǵ", // U+01F5: LATIN SMALL LETTER G WITH ACUTE
2153 "ɠ", // U+0260: LATIN SMALL LETTER G WITH HOOK
2154 "ɡ", // U+0261: LATIN SMALL LETTER SCRIPT G
2155 "ᵷ", // U+1D77: LATIN SMALL LETTER TURNED G
2156 "ᵹ", // U+1D79: LATIN SMALL LETTER INSULAR G
2157 "ᶃ", // U+1D83: LATIN SMALL LETTER G WITH PALATAL HOOK
2158 "ḡ", // U+1E21: LATIN SMALL LETTER G WITH MACRON
2159 "ⓖ", // U+24D6: CIRCLED LATIN SMALL LETTER G
2160 "ꝿ", // U+A77F: LATIN SMALL LETTER TURNED INSULAR G
2161 "g", // U+FF47: FULLWIDTH LATIN SMALL LETTER G
2162 ],
2163 "g",
2164 ),
2165 (
2166 &[
2167 "⒢", // U+24A2: PARENTHESIZED LATIN SMALL LETTER G
2168 ],
2169 "(g)",
2170 ),
2171 (
2172 &[
2173 "Ĥ", // U+0124: LATIN CAPITAL LETTER H WITH CIRCUMFLEX
2174 "Ħ", // U+0126: LATIN CAPITAL LETTER H WITH STROKE
2175 "Ȟ", // U+021E: LATIN CAPITAL LETTER H WITH CARON
2176 "ʜ", // U+029C: LATIN LETTER SMALL CAPITAL H
2177 "Ḣ", // U+1E22: LATIN CAPITAL LETTER H WITH DOT ABOVE
2178 "Ḥ", // U+1E24: LATIN CAPITAL LETTER H WITH DOT BELOW
2179 "Ḧ", // U+1E26: LATIN CAPITAL LETTER H WITH DIAERESIS
2180 "Ḩ", // U+1E28: LATIN CAPITAL LETTER H WITH CEDILLA
2181 "Ḫ", // U+1E2A: LATIN CAPITAL LETTER H WITH BREVE BELOW
2182 "Ⓗ", // U+24BD: CIRCLED LATIN CAPITAL LETTER H
2183 "Ⱨ", // U+2C67: LATIN CAPITAL LETTER H WITH DESCENDER
2184 "Ⱶ", // U+2C75: LATIN CAPITAL LETTER HALF H
2185 "H", // U+FF28: FULLWIDTH LATIN CAPITAL LETTER H
2186 ],
2187 "H",
2188 ),
2189 (
2190 &[
2191 "ĥ", // U+0125: LATIN SMALL LETTER H WITH CIRCUMFLEX
2192 "ħ", // U+0127: LATIN SMALL LETTER H WITH STROKE
2193 "ȟ", // U+021F: LATIN SMALL LETTER H WITH CARON
2194 "ɥ", // U+0265: LATIN SMALL LETTER TURNED H
2195 "ɦ", // U+0266: LATIN SMALL LETTER H WITH HOOK
2196 "ʮ", // U+02AE: LATIN SMALL LETTER TURNED H WITH FISHHOOK
2197 "ʯ", // U+02AF: LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
2198 "ḣ", // U+1E23: LATIN SMALL LETTER H WITH DOT ABOVE
2199 "ḥ", // U+1E25: LATIN SMALL LETTER H WITH DOT BELOW
2200 "ḧ", // U+1E27: LATIN SMALL LETTER H WITH DIAERESIS
2201 "ḩ", // U+1E29: LATIN SMALL LETTER H WITH CEDILLA
2202 "ḫ", // U+1E2B: LATIN SMALL LETTER H WITH BREVE BELOW
2203 "ẖ", // U+1E96: LATIN SMALL LETTER H WITH LINE BELOW
2204 "ⓗ", // U+24D7: CIRCLED LATIN SMALL LETTER H
2205 "ⱨ", // U+2C68: LATIN SMALL LETTER H WITH DESCENDER
2206 "ⱶ", // U+2C76: LATIN SMALL LETTER HALF H
2207 "h", // U+FF48: FULLWIDTH LATIN SMALL LETTER H
2208 ],
2209 "h",
2210 ),
2211 (
2212 &[
2213 "Ƕ", // U+01F6: LATIN CAPITAL LETTER HWAIR
2214 ],
2215 "HV",
2216 ),
2217 (
2218 &[
2219 "⒣", // U+24A3: PARENTHESIZED LATIN SMALL LETTER H
2220 ],
2221 "(h)",
2222 ),
2223 (
2224 &[
2225 "ƕ", // U+0195: LATIN SMALL LETTER HV
2226 ],
2227 "hv",
2228 ),
2229 (
2230 &[
2231 "Ì", // U+00CC: LATIN CAPITAL LETTER I WITH GRAVE
2232 "Í", // U+00CD: LATIN CAPITAL LETTER I WITH ACUTE
2233 "Î", // U+00CE: LATIN CAPITAL LETTER I WITH CIRCUMFLEX
2234 "Ï", // U+00CF: LATIN CAPITAL LETTER I WITH DIAERESIS
2235 "Ĩ", // U+0128: LATIN CAPITAL LETTER I WITH TILDE
2236 "Ī", // U+012A: LATIN CAPITAL LETTER I WITH MACRON
2237 "Ĭ", // U+012C: LATIN CAPITAL LETTER I WITH BREVE
2238 "Į", // U+012E: LATIN CAPITAL LETTER I WITH OGONEK
2239 "İ", // U+0130: LATIN CAPITAL LETTER I WITH DOT ABOVE
2240 "Ɩ", // U+0196: LATIN CAPITAL LETTER IOTA
2241 "Ɨ", // U+0197: LATIN CAPITAL LETTER I WITH STROKE
2242 "Ǐ", // U+01CF: LATIN CAPITAL LETTER I WITH CARON
2243 "Ȉ", // U+0208: LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
2244 "Ȋ", // U+020A: LATIN CAPITAL LETTER I WITH INVERTED BREVE
2245 "ɪ", // U+026A: LATIN LETTER SMALL CAPITAL I
2246 "ᵻ", // U+1D7B: LATIN SMALL CAPITAL LETTER I WITH STROKE
2247 "Ḭ", // U+1E2C: LATIN CAPITAL LETTER I WITH TILDE BELOW
2248 "Ḯ", // U+1E2E: LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
2249 "Ỉ", // U+1EC8: LATIN CAPITAL LETTER I WITH HOOK ABOVE
2250 "Ị", // U+1ECA: LATIN CAPITAL LETTER I WITH DOT BELOW
2251 "Ⓘ", // U+24BE: CIRCLED LATIN CAPITAL LETTER I
2252 "ꟾ", // U+A7FE: LATIN EPIGRAPHIC LETTER I LONGA
2253 "I", // U+FF29: FULLWIDTH LATIN CAPITAL LETTER I
2254 ],
2255 "I",
2256 ),
2257 (
2258 &[
2259 "ì", // U+00EC: LATIN SMALL LETTER I WITH GRAVE
2260 "í", // U+00ED: LATIN SMALL LETTER I WITH ACUTE
2261 "î", // U+00EE: LATIN SMALL LETTER I WITH CIRCUMFLEX
2262 "ï", // U+00EF: LATIN SMALL LETTER I WITH DIAERESIS
2263 "ĩ", // U+0129: LATIN SMALL LETTER I WITH TILDE
2264 "ī", // U+012B: LATIN SMALL LETTER I WITH MACRON
2265 "ĭ", // U+012D: LATIN SMALL LETTER I WITH BREVE
2266 "į", // U+012F: LATIN SMALL LETTER I WITH OGONEK
2267 "ı", // U+0131: LATIN SMALL LETTER DOTLESS I
2268 "ǐ", // U+01D0: LATIN SMALL LETTER I WITH CARON
2269 "ȉ", // U+0209: LATIN SMALL LETTER I WITH DOUBLE GRAVE
2270 "ȋ", // U+020B: LATIN SMALL LETTER I WITH INVERTED BREVE
2271 "ɨ", // U+0268: LATIN SMALL LETTER I WITH STROKE
2272 "ᴉ", // U+1D09: LATIN SMALL LETTER TURNED I
2273 "ᵢ", // U+1D62: LATIN SUBSCRIPT SMALL LETTER I
2274 "ᵼ", // U+1D7C: LATIN SMALL LETTER IOTA WITH STROKE
2275 "ᶖ", // U+1D96: LATIN SMALL LETTER I WITH RETROFLEX HOOK
2276 "ḭ", // U+1E2D: LATIN SMALL LETTER I WITH TILDE BELOW
2277 "ḯ", // U+1E2F: LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE
2278 "ỉ", // U+1EC9: LATIN SMALL LETTER I WITH HOOK ABOVE
2279 "ị", // U+1ECB: LATIN SMALL LETTER I WITH DOT BELOW
2280 "ⁱ", // U+2071: SUPERSCRIPT LATIN SMALL LETTER I
2281 "ⓘ", // U+24D8: CIRCLED LATIN SMALL LETTER I
2282 "i", // U+FF49: FULLWIDTH LATIN SMALL LETTER I
2283 ],
2284 "i",
2285 ),
2286 (
2287 &[
2288 "IJ", // U+0132: LATIN CAPITAL LIGATURE IJ
2289 ],
2290 "IJ",
2291 ),
2292 (
2293 &[
2294 "⒤", // U+24A4: PARENTHESIZED LATIN SMALL LETTER I
2295 ],
2296 "(i)",
2297 ),
2298 (
2299 &[
2300 "ij", // U+0133: LATIN SMALL LIGATURE IJ
2301 ],
2302 "ij",
2303 ),
2304 (
2305 &[
2306 "Ĵ", // U+0134: LATIN CAPITAL LETTER J WITH CIRCUMFLEX
2307 "Ɉ", // U+0248: LATIN CAPITAL LETTER J WITH STROKE
2308 "ᴊ", // U+1D0A: LATIN LETTER SMALL CAPITAL J
2309 "Ⓙ", // U+24BF: CIRCLED LATIN CAPITAL LETTER J
2310 "J", // U+FF2A: FULLWIDTH LATIN CAPITAL LETTER J
2311 ],
2312 "J",
2313 ),
2314 (
2315 &[
2316 "ĵ", // U+0135: LATIN SMALL LETTER J WITH CIRCUMFLEX
2317 "ǰ", // U+01F0: LATIN SMALL LETTER J WITH CARON
2318 "ȷ", // U+0237: LATIN SMALL LETTER DOTLESS J
2319 "ɉ", // U+0249: LATIN SMALL LETTER J WITH STROKE
2320 "ɟ", // U+025F: LATIN SMALL LETTER DOTLESS J WITH STROKE
2321 "ʄ", // U+0284: LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK
2322 "ʝ", // U+029D: LATIN SMALL LETTER J WITH CROSSED-TAIL
2323 "ⓙ", // U+24D9: CIRCLED LATIN SMALL LETTER J
2324 "ⱼ", // U+2C7C: LATIN SUBSCRIPT SMALL LETTER J
2325 "j", // U+FF4A: FULLWIDTH LATIN SMALL LETTER J
2326 ],
2327 "j",
2328 ),
2329 (
2330 &[
2331 "⒥", // U+24A5: PARENTHESIZED LATIN SMALL LETTER J
2332 ],
2333 "(j)",
2334 ),
2335 (
2336 &[
2337 "Ķ", // U+0136: LATIN CAPITAL LETTER K WITH CEDILLA
2338 "Ƙ", // U+0198: LATIN CAPITAL LETTER K WITH HOOK
2339 "Ǩ", // U+01E8: LATIN CAPITAL LETTER K WITH CARON
2340 "ᴋ", // U+1D0B: LATIN LETTER SMALL CAPITAL K
2341 "Ḱ", // U+1E30: LATIN CAPITAL LETTER K WITH ACUTE
2342 "Ḳ", // U+1E32: LATIN CAPITAL LETTER K WITH DOT BELOW
2343 "Ḵ", // U+1E34: LATIN CAPITAL LETTER K WITH LINE BELOW
2344 "Ⓚ", // U+24C0: CIRCLED LATIN CAPITAL LETTER K
2345 "Ⱪ", // U+2C69: LATIN CAPITAL LETTER K WITH DESCENDER
2346 "Ꝁ", // U+A740: LATIN CAPITAL LETTER K WITH STROKE
2347 "Ꝃ", // U+A742: LATIN CAPITAL LETTER K WITH DIAGONAL STROKE
2348 "Ꝅ", // U+A744: LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE
2349 "K", // U+FF2B: FULLWIDTH LATIN CAPITAL LETTER K
2350 ],
2351 "K",
2352 ),
2353 (
2354 &[
2355 "ķ", // U+0137: LATIN SMALL LETTER K WITH CEDILLA
2356 "ƙ", // U+0199: LATIN SMALL LETTER K WITH HOOK
2357 "ǩ", // U+01E9: LATIN SMALL LETTER K WITH CARON
2358 "ʞ", // U+029E: LATIN SMALL LETTER TURNED K
2359 "ᶄ", // U+1D84: LATIN SMALL LETTER K WITH PALATAL HOOK
2360 "ḱ", // U+1E31: LATIN SMALL LETTER K WITH ACUTE
2361 "ḳ", // U+1E33: LATIN SMALL LETTER K WITH DOT BELOW
2362 "ḵ", // U+1E35: LATIN SMALL LETTER K WITH LINE BELOW
2363 "ⓚ", // U+24DA: CIRCLED LATIN SMALL LETTER K
2364 "ⱪ", // U+2C6A: LATIN SMALL LETTER K WITH DESCENDER
2365 "ꝁ", // U+A741: LATIN SMALL LETTER K WITH STROKE
2366 "ꝃ", // U+A743: LATIN SMALL LETTER K WITH DIAGONAL STROKE
2367 "ꝅ", // U+A745: LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE
2368 "k", // U+FF4B: FULLWIDTH LATIN SMALL LETTER K
2369 ],
2370 "k",
2371 ),
2372 (
2373 &[
2374 "⒦", // U+24A6: PARENTHESIZED LATIN SMALL LETTER K
2375 ],
2376 "(k)",
2377 ),
2378 (
2379 &[
2380 "Ĺ", // U+0139: LATIN CAPITAL LETTER L WITH ACUTE
2381 "Ļ", // U+013B: LATIN CAPITAL LETTER L WITH CEDILLA
2382 "Ľ", // U+013D: LATIN CAPITAL LETTER L WITH CARON
2383 "Ŀ", // U+013F: LATIN CAPITAL LETTER L WITH MIDDLE DOT
2384 "Ł", // U+0141: LATIN CAPITAL LETTER L WITH STROKE
2385 "Ƚ", // U+023D: LATIN CAPITAL LETTER L WITH BAR
2386 "ʟ", // U+029F: LATIN LETTER SMALL CAPITAL L
2387 "ᴌ", // U+1D0C: LATIN LETTER SMALL CAPITAL L WITH STROKE
2388 "Ḷ", // U+1E36: LATIN CAPITAL LETTER L WITH DOT BELOW
2389 "Ḹ", // U+1E38: LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
2390 "Ḻ", // U+1E3A: LATIN CAPITAL LETTER L WITH LINE BELOW
2391 "Ḽ", // U+1E3C: LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
2392 "Ⓛ", // U+24C1: CIRCLED LATIN CAPITAL LETTER L
2393 "Ⱡ", // U+2C60: LATIN CAPITAL LETTER L WITH DOUBLE BAR
2394 "Ɫ", // U+2C62: LATIN CAPITAL LETTER L WITH MIDDLE TILDE
2395 "Ꝇ", // U+A746: LATIN CAPITAL LETTER BROKEN L
2396 "Ꝉ", // U+A748: LATIN CAPITAL LETTER L WITH HIGH STROKE
2397 "Ꞁ", // U+A780: LATIN CAPITAL LETTER TURNED L
2398 "L", // U+FF2C: FULLWIDTH LATIN CAPITAL LETTER L
2399 ],
2400 "L",
2401 ),
2402 (
2403 &[
2404 "ĺ", // U+013A: LATIN SMALL LETTER L WITH ACUTE
2405 "ļ", // U+013C: LATIN SMALL LETTER L WITH CEDILLA
2406 "ľ", // U+013E: LATIN SMALL LETTER L WITH CARON
2407 "ŀ", // U+0140: LATIN SMALL LETTER L WITH MIDDLE DOT
2408 "ł", // U+0142: LATIN SMALL LETTER L WITH STROKE
2409 "ƚ", // U+019A: LATIN SMALL LETTER L WITH BAR
2410 "ȴ", // U+0234: LATIN SMALL LETTER L WITH CURL
2411 "ɫ", // U+026B: LATIN SMALL LETTER L WITH MIDDLE TILDE
2412 "ɬ", // U+026C: LATIN SMALL LETTER L WITH BELT
2413 "ɭ", // U+026D: LATIN SMALL LETTER L WITH RETROFLEX HOOK
2414 "ᶅ", // U+1D85: LATIN SMALL LETTER L WITH PALATAL HOOK
2415 "ḷ", // U+1E37: LATIN SMALL LETTER L WITH DOT BELOW
2416 "ḹ", // U+1E39: LATIN SMALL LETTER L WITH DOT BELOW AND MACRON
2417 "ḻ", // U+1E3B: LATIN SMALL LETTER L WITH LINE BELOW
2418 "ḽ", // U+1E3D: LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW
2419 "ⓛ", // U+24DB: CIRCLED LATIN SMALL LETTER L
2420 "ⱡ", // U+2C61: LATIN SMALL LETTER L WITH DOUBLE BAR
2421 "ꝇ", // U+A747: LATIN SMALL LETTER BROKEN L
2422 "ꝉ", // U+A749: LATIN SMALL LETTER L WITH HIGH STROKE
2423 "ꞁ", // U+A781: LATIN SMALL LETTER TURNED L
2424 "l", // U+FF4C: FULLWIDTH LATIN SMALL LETTER L
2425 ],
2426 "l",
2427 ),
2428 (
2429 &[
2430 "LJ", // U+01C7: LATIN CAPITAL LETTER LJ
2431 ],
2432 "LJ",
2433 ),
2434 (
2435 &[
2436 "Ỻ", // U+1EFA: LATIN CAPITAL LETTER MIDDLE-WELSH LL
2437 ],
2438 "LL",
2439 ),
2440 (
2441 &[
2442 "Lj", // U+01C8: LATIN CAPITAL LETTER L WITH SMALL LETTER J
2443 ],
2444 "Lj",
2445 ),
2446 (
2447 &[
2448 "⒧", // U+24A7: PARENTHESIZED LATIN SMALL LETTER L
2449 ],
2450 "(l)",
2451 ),
2452 (
2453 &[
2454 "lj", // U+01C9: LATIN SMALL LETTER LJ
2455 ],
2456 "lj",
2457 ),
2458 (
2459 &[
2460 "ỻ", // U+1EFB: LATIN SMALL LETTER MIDDLE-WELSH LL
2461 ],
2462 "ll",
2463 ),
2464 (
2465 &[
2466 "ʪ", // U+02AA: LATIN SMALL LETTER LS DIGRAPH
2467 ],
2468 "ls",
2469 ),
2470 (
2471 &[
2472 "ʫ", // U+02AB: LATIN SMALL LETTER LZ DIGRAPH
2473 ],
2474 "lz",
2475 ),
2476 (
2477 &[
2478 "Ɯ", // U+019C: LATIN CAPITAL LETTER TURNED M
2479 "ᴍ", // U+1D0D: LATIN LETTER SMALL CAPITAL M
2480 "Ḿ", // U+1E3E: LATIN CAPITAL LETTER M WITH ACUTE
2481 "Ṁ", // U+1E40: LATIN CAPITAL LETTER M WITH DOT ABOVE
2482 "Ṃ", // U+1E42: LATIN CAPITAL LETTER M WITH DOT BELOW
2483 "Ⓜ", // U+24C2: CIRCLED LATIN CAPITAL LETTER M
2484 "Ɱ", // U+2C6E: LATIN CAPITAL LETTER M WITH HOOK
2485 "ꟽ", // U+A7FD: LATIN EPIGRAPHIC LETTER INVERTED M
2486 "ꟿ", // U+A7FF: LATIN EPIGRAPHIC LETTER ARCHAIC M
2487 "M", // U+FF2D: FULLWIDTH LATIN CAPITAL LETTER M
2488 ],
2489 "M",
2490 ),
2491 (
2492 &[
2493 "ɯ", // U+026F: LATIN SMALL LETTER TURNED M
2494 "ɰ", // U+0270: LATIN SMALL LETTER TURNED M WITH LONG LEG
2495 "ɱ", // U+0271: LATIN SMALL LETTER M WITH HOOK
2496 "ᵯ", // U+1D6F: LATIN SMALL LETTER M WITH MIDDLE TILDE
2497 "ᶆ", // U+1D86: LATIN SMALL LETTER M WITH PALATAL HOOK
2498 "ḿ", // U+1E3F: LATIN SMALL LETTER M WITH ACUTE
2499 "ṁ", // U+1E41: LATIN SMALL LETTER M WITH DOT ABOVE
2500 "ṃ", // U+1E43: LATIN SMALL LETTER M WITH DOT BELOW
2501 "ⓜ", // U+24DC: CIRCLED LATIN SMALL LETTER M
2502 "m", // U+FF4D: FULLWIDTH LATIN SMALL LETTER M
2503 ],
2504 "m",
2505 ),
2506 (
2507 &[
2508 "⒨", // U+24A8: PARENTHESIZED LATIN SMALL LETTER M
2509 ],
2510 "(m)",
2511 ),
2512 (
2513 &[
2514 "Ñ", // U+00D1: LATIN CAPITAL LETTER N WITH TILDE
2515 "Ń", // U+0143: LATIN CAPITAL LETTER N WITH ACUTE
2516 "Ņ", // U+0145: LATIN CAPITAL LETTER N WITH CEDILLA
2517 "Ň", // U+0147: LATIN CAPITAL LETTER N WITH CARON
2518 "Ŋ", // U+014A: LATIN CAPITAL LETTER ENG
2519 "Ɲ", // U+019D: LATIN CAPITAL LETTER N WITH LEFT HOOK
2520 "Ǹ", // U+01F8: LATIN CAPITAL LETTER N WITH GRAVE
2521 "Ƞ", // U+0220: LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
2522 "ɴ", // U+0274: LATIN LETTER SMALL CAPITAL N
2523 "ᴎ", // U+1D0E: LATIN LETTER SMALL CAPITAL REVERSED N
2524 "Ṅ", // U+1E44: LATIN CAPITAL LETTER N WITH DOT ABOVE
2525 "Ṇ", // U+1E46: LATIN CAPITAL LETTER N WITH DOT BELOW
2526 "Ṉ", // U+1E48: LATIN CAPITAL LETTER N WITH LINE BELOW
2527 "Ṋ", // U+1E4A: LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
2528 "Ⓝ", // U+24C3: CIRCLED LATIN CAPITAL LETTER N
2529 "N", // U+FF2E: FULLWIDTH LATIN CAPITAL LETTER N
2530 ],
2531 "N",
2532 ),
2533 (
2534 &[
2535 "ñ", // U+00F1: LATIN SMALL LETTER N WITH TILDE
2536 "ń", // U+0144: LATIN SMALL LETTER N WITH ACUTE
2537 "ņ", // U+0146: LATIN SMALL LETTER N WITH CEDILLA
2538 "ň", // U+0148: LATIN SMALL LETTER N WITH CARON
2539 "ʼn", // U+0149: LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
2540 "ŋ", // U+014B: LATIN SMALL LETTER ENG
2541 "ƞ", // U+019E: LATIN SMALL LETTER N WITH LONG RIGHT LEG
2542 "ǹ", // U+01F9: LATIN SMALL LETTER N WITH GRAVE
2543 "ȵ", // U+0235: LATIN SMALL LETTER N WITH CURL
2544 "ɲ", // U+0272: LATIN SMALL LETTER N WITH LEFT HOOK
2545 "ɳ", // U+0273: LATIN SMALL LETTER N WITH RETROFLEX HOOK
2546 "ᵰ", // U+1D70: LATIN SMALL LETTER N WITH MIDDLE TILDE
2547 "ᶇ", // U+1D87: LATIN SMALL LETTER N WITH PALATAL HOOK
2548 "ṅ", // U+1E45: LATIN SMALL LETTER N WITH DOT ABOVE
2549 "ṇ", // U+1E47: LATIN SMALL LETTER N WITH DOT BELOW
2550 "ṉ", // U+1E49: LATIN SMALL LETTER N WITH LINE BELOW
2551 "ṋ", // U+1E4B: LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW
2552 "ⁿ", // U+207F: SUPERSCRIPT LATIN SMALL LETTER N
2553 "ⓝ", // U+24DD: CIRCLED LATIN SMALL LETTER N
2554 "n", // U+FF4E: FULLWIDTH LATIN SMALL LETTER N
2555 ],
2556 "n",
2557 ),
2558 (
2559 &[
2560 "NJ", // U+01CA: LATIN CAPITAL LETTER NJ
2561 ],
2562 "NJ",
2563 ),
2564 (
2565 &[
2566 "Nj", // U+01CB: LATIN CAPITAL LETTER N WITH SMALL LETTER J
2567 ],
2568 "Nj",
2569 ),
2570 (
2571 &[
2572 "⒩", // U+24A9: PARENTHESIZED LATIN SMALL LETTER N
2573 ],
2574 "(n)",
2575 ),
2576 (
2577 &[
2578 "nj", // U+01CC: LATIN SMALL LETTER NJ
2579 ],
2580 "nj",
2581 ),
2582 (
2583 &[
2584 "Ò", // U+00D2: LATIN CAPITAL LETTER O WITH GRAVE
2585 "Ó", // U+00D3: LATIN CAPITAL LETTER O WITH ACUTE
2586 "Ô", // U+00D4: LATIN CAPITAL LETTER O WITH CIRCUMFLEX
2587 "Õ", // U+00D5: LATIN CAPITAL LETTER O WITH TILDE
2588 "Ö", // U+00D6: LATIN CAPITAL LETTER O WITH DIAERESIS
2589 "Ø", // U+00D8: LATIN CAPITAL LETTER O WITH STROKE
2590 "Ō", // U+014C: LATIN CAPITAL LETTER O WITH MACRON
2591 "Ŏ", // U+014E: LATIN CAPITAL LETTER O WITH BREVE
2592 "Ő", // U+0150: LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
2593 "Ɔ", // U+0186: LATIN CAPITAL LETTER OPEN O
2594 "Ɵ", // U+019F: LATIN CAPITAL LETTER O WITH MIDDLE TILDE
2595 "Ơ", // U+01A0: LATIN CAPITAL LETTER O WITH HORN
2596 "Ǒ", // U+01D1: LATIN CAPITAL LETTER O WITH CARON
2597 "Ǫ", // U+01EA: LATIN CAPITAL LETTER O WITH OGONEK
2598 "Ǭ", // U+01EC: LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
2599 "Ǿ", // U+01FE: LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
2600 "Ȍ", // U+020C: LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
2601 "Ȏ", // U+020E: LATIN CAPITAL LETTER O WITH INVERTED BREVE
2602 "Ȫ", // U+022A: LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
2603 "Ȭ", // U+022C: LATIN CAPITAL LETTER O WITH TILDE AND MACRON
2604 "Ȯ", // U+022E: LATIN CAPITAL LETTER O WITH DOT ABOVE
2605 "Ȱ", // U+0230: LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
2606 "ᴏ", // U+1D0F: LATIN LETTER SMALL CAPITAL O
2607 "ᴐ", // U+1D10: LATIN LETTER SMALL CAPITAL OPEN O
2608 "Ṍ", // U+1E4C: LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
2609 "Ṏ", // U+1E4E: LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
2610 "Ṑ", // U+1E50: LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
2611 "Ṓ", // U+1E52: LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
2612 "Ọ", // U+1ECC: LATIN CAPITAL LETTER O WITH DOT BELOW
2613 "Ỏ", // U+1ECE: LATIN CAPITAL LETTER O WITH HOOK ABOVE
2614 "Ố", // U+1ED0: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
2615 "Ồ", // U+1ED2: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
2616 "Ổ", // U+1ED4: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
2617 "Ỗ", // U+1ED6: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
2618 "Ộ", // U+1ED8: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
2619 "Ớ", // U+1EDA: LATIN CAPITAL LETTER O WITH HORN AND ACUTE
2620 "Ờ", // U+1EDC: LATIN CAPITAL LETTER O WITH HORN AND GRAVE
2621 "Ở", // U+1EDE: LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
2622 "Ỡ", // U+1EE0: LATIN CAPITAL LETTER O WITH HORN AND TILDE
2623 "Ợ", // U+1EE2: LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
2624 "Ⓞ", // U+24C4: CIRCLED LATIN CAPITAL LETTER O
2625 "Ꝋ", // U+A74A: LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY
2626 "Ꝍ", // U+A74C: LATIN CAPITAL LETTER O WITH LOOP
2627 "O", // U+FF2F: FULLWIDTH LATIN CAPITAL LETTER O
2628 ],
2629 "O",
2630 ),
2631 (
2632 &[
2633 "ò", // U+00F2: LATIN SMALL LETTER O WITH GRAVE
2634 "ó", // U+00F3: LATIN SMALL LETTER O WITH ACUTE
2635 "ô", // U+00F4: LATIN SMALL LETTER O WITH CIRCUMFLEX
2636 "õ", // U+00F5: LATIN SMALL LETTER O WITH TILDE
2637 "ö", // U+00F6: LATIN SMALL LETTER O WITH DIAERESIS
2638 "ø", // U+00F8: LATIN SMALL LETTER O WITH STROKE
2639 "ō", // U+014D: LATIN SMALL LETTER O WITH MACRON
2640 "ŏ", // U+014F: LATIN SMALL LETTER O WITH BREVE
2641 "ő", // U+0151: LATIN SMALL LETTER O WITH DOUBLE ACUTE
2642 "ơ", // U+01A1: LATIN SMALL LETTER O WITH HORN
2643 "ǒ", // U+01D2: LATIN SMALL LETTER O WITH CARON
2644 "ǫ", // U+01EB: LATIN SMALL LETTER O WITH OGONEK
2645 "ǭ", // U+01ED: LATIN SMALL LETTER O WITH OGONEK AND MACRON
2646 "ǿ", // U+01FF: LATIN SMALL LETTER O WITH STROKE AND ACUTE
2647 "ȍ", // U+020D: LATIN SMALL LETTER O WITH DOUBLE GRAVE
2648 "ȏ", // U+020F: LATIN SMALL LETTER O WITH INVERTED BREVE
2649 "ȫ", // U+022B: LATIN SMALL LETTER O WITH DIAERESIS AND MACRON
2650 "ȭ", // U+022D: LATIN SMALL LETTER O WITH TILDE AND MACRON
2651 "ȯ", // U+022F: LATIN SMALL LETTER O WITH DOT ABOVE
2652 "ȱ", // U+0231: LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON
2653 "ɔ", // U+0254: LATIN SMALL LETTER OPEN O
2654 "ɵ", // U+0275: LATIN SMALL LETTER BARRED O
2655 "ᴖ", // U+1D16: LATIN SMALL LETTER TOP HALF O
2656 "ᴗ", // U+1D17: LATIN SMALL LETTER BOTTOM HALF O
2657 "ᶗ", // U+1D97: LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK
2658 "ṍ", // U+1E4D: LATIN SMALL LETTER O WITH TILDE AND ACUTE
2659 "ṏ", // U+1E4F: LATIN SMALL LETTER O WITH TILDE AND DIAERESIS
2660 "ṑ", // U+1E51: LATIN SMALL LETTER O WITH MACRON AND GRAVE
2661 "ṓ", // U+1E53: LATIN SMALL LETTER O WITH MACRON AND ACUTE
2662 "ọ", // U+1ECD: LATIN SMALL LETTER O WITH DOT BELOW
2663 "ỏ", // U+1ECF: LATIN SMALL LETTER O WITH HOOK ABOVE
2664 "ố", // U+1ED1: LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE
2665 "ồ", // U+1ED3: LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE
2666 "ổ", // U+1ED5: LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
2667 "ỗ", // U+1ED7: LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE
2668 "ộ", // U+1ED9: LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW
2669 "ớ", // U+1EDB: LATIN SMALL LETTER O WITH HORN AND ACUTE
2670 "ờ", // U+1EDD: LATIN SMALL LETTER O WITH HORN AND GRAVE
2671 "ở", // U+1EDF: LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE
2672 "ỡ", // U+1EE1: LATIN SMALL LETTER O WITH HORN AND TILDE
2673 "ợ", // U+1EE3: LATIN SMALL LETTER O WITH HORN AND DOT BELOW
2674 "ₒ", // U+2092: LATIN SUBSCRIPT SMALL LETTER O
2675 "ⓞ", // U+24DE: CIRCLED LATIN SMALL LETTER O
2676 "ⱺ", // U+2C7A: LATIN SMALL LETTER O WITH LOW RING INSIDE
2677 "ꝋ", // U+A74B: LATIN SMALL LETTER O WITH LONG STROKE OVERLAY
2678 "ꝍ", // U+A74D: LATIN SMALL LETTER O WITH LOOP
2679 "o", // U+FF4F: FULLWIDTH LATIN SMALL LETTER O
2680 ],
2681 "o",
2682 ),
2683 (
2684 &[
2685 "Œ", // U+0152: LATIN CAPITAL LIGATURE OE
2686 "ɶ", // U+0276: LATIN LETTER SMALL CAPITAL OE
2687 ],
2688 "OE",
2689 ),
2690 (
2691 &[
2692 "Ꝏ", // U+A74E: LATIN CAPITAL LETTER OO
2693 ],
2694 "OO",
2695 ),
2696 (
2697 &[
2698 "Ȣ", // U+0222: LATIN CAPITAL LETTER OU
2699 "ᴕ", // U+1D15: LATIN LETTER SMALL CAPITAL OU
2700 ],
2701 "OU",
2702 ),
2703 (
2704 &[
2705 "⒪", // U+24AA: PARENTHESIZED LATIN SMALL LETTER O
2706 ],
2707 "(o)",
2708 ),
2709 (
2710 &[
2711 "œ", // U+0153: LATIN SMALL LIGATURE OE
2712 "ᴔ", // U+1D14: LATIN SMALL LETTER TURNED OE
2713 ],
2714 "oe",
2715 ),
2716 (
2717 &[
2718 "ꝏ", // U+A74F: LATIN SMALL LETTER OO
2719 ],
2720 "oo",
2721 ),
2722 (
2723 &[
2724 "ȣ", // U+0223: LATIN SMALL LETTER OU
2725 ],
2726 "ou",
2727 ),
2728 (
2729 &[
2730 "Ƥ", // U+01A4: LATIN CAPITAL LETTER P WITH HOOK
2731 "ᴘ", // U+1D18: LATIN LETTER SMALL CAPITAL P
2732 "Ṕ", // U+1E54: LATIN CAPITAL LETTER P WITH ACUTE
2733 "Ṗ", // U+1E56: LATIN CAPITAL LETTER P WITH DOT ABOVE
2734 "Ⓟ", // U+24C5: CIRCLED LATIN CAPITAL LETTER P
2735 "Ᵽ", // U+2C63: LATIN CAPITAL LETTER P WITH STROKE
2736 "Ꝑ", // U+A750: LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER
2737 "Ꝓ", // U+A752: LATIN CAPITAL LETTER P WITH FLOURISH
2738 "Ꝕ", // U+A754: LATIN CAPITAL LETTER P WITH SQUIRREL TAIL
2739 "P", // U+FF30: FULLWIDTH LATIN CAPITAL LETTER P
2740 ],
2741 "P",
2742 ),
2743 (
2744 &[
2745 "ƥ", // U+01A5: LATIN SMALL LETTER P WITH HOOK
2746 "ᵱ", // U+1D71: LATIN SMALL LETTER P WITH MIDDLE TILDE
2747 "ᵽ", // U+1D7D: LATIN SMALL LETTER P WITH STROKE
2748 "ᶈ", // U+1D88: LATIN SMALL LETTER P WITH PALATAL HOOK
2749 "ṕ", // U+1E55: LATIN SMALL LETTER P WITH ACUTE
2750 "ṗ", // U+1E57: LATIN SMALL LETTER P WITH DOT ABOVE
2751 "ⓟ", // U+24DF: CIRCLED LATIN SMALL LETTER P
2752 "ꝑ", // U+A751: LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER
2753 "ꝓ", // U+A753: LATIN SMALL LETTER P WITH FLOURISH
2754 "ꝕ", // U+A755: LATIN SMALL LETTER P WITH SQUIRREL TAIL
2755 "ꟼ", // U+A7FC: LATIN EPIGRAPHIC LETTER REVERSED P
2756 "p", // U+FF50: FULLWIDTH LATIN SMALL LETTER P
2757 ],
2758 "p",
2759 ),
2760 (
2761 &[
2762 "⒫", // U+24AB: PARENTHESIZED LATIN SMALL LETTER P
2763 ],
2764 "(p)",
2765 ),
2766 (
2767 &[
2768 "Ɋ", // U+024A: LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL
2769 "Ⓠ", // U+24C6: CIRCLED LATIN CAPITAL LETTER Q
2770 "Ꝗ", // U+A756: LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER
2771 "Ꝙ", // U+A758: LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE
2772 "Q", // U+FF31: FULLWIDTH LATIN CAPITAL LETTER Q
2773 ],
2774 "Q",
2775 ),
2776 (
2777 &[
2778 "ĸ", // U+0138: LATIN SMALL LETTER KRA
2779 "ɋ", // U+024B: LATIN SMALL LETTER Q WITH HOOK TAIL
2780 "ʠ", // U+02A0: LATIN SMALL LETTER Q WITH HOOK
2781 "ⓠ", // U+24E0: CIRCLED LATIN SMALL LETTER Q
2782 "ꝗ", // U+A757: LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER
2783 "ꝙ", // U+A759: LATIN SMALL LETTER Q WITH DIAGONAL STROKE
2784 "q", // U+FF51: FULLWIDTH LATIN SMALL LETTER Q
2785 ],
2786 "q",
2787 ),
2788 (
2789 &[
2790 "⒬", // U+24AC: PARENTHESIZED LATIN SMALL LETTER Q
2791 ],
2792 "(q)",
2793 ),
2794 (
2795 &[
2796 "ȹ", // U+0239: LATIN SMALL LETTER QP DIGRAPH
2797 ],
2798 "qp",
2799 ),
2800 (
2801 &[
2802 "Ŕ", // U+0154: LATIN CAPITAL LETTER R WITH ACUTE
2803 "Ŗ", // U+0156: LATIN CAPITAL LETTER R WITH CEDILLA
2804 "Ř", // U+0158: LATIN CAPITAL LETTER R WITH CARON
2805 "Ȑ", // U+0210: LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
2806 "Ȓ", // U+0212: LATIN CAPITAL LETTER R WITH INVERTED BREVE
2807 "Ɍ", // U+024C: LATIN CAPITAL LETTER R WITH STROKE
2808 "ʀ", // U+0280: LATIN LETTER SMALL CAPITAL R
2809 "ʁ", // U+0281: LATIN LETTER SMALL CAPITAL INVERTED R
2810 "ᴙ", // U+1D19: LATIN LETTER SMALL CAPITAL REVERSED R
2811 "ᴚ", // U+1D1A: LATIN LETTER SMALL CAPITAL TURNED R
2812 "Ṙ", // U+1E58: LATIN CAPITAL LETTER R WITH DOT ABOVE
2813 "Ṛ", // U+1E5A: LATIN CAPITAL LETTER R WITH DOT BELOW
2814 "Ṝ", // U+1E5C: LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
2815 "Ṟ", // U+1E5E: LATIN CAPITAL LETTER R WITH LINE BELOW
2816 "Ⓡ", // U+24C7: CIRCLED LATIN CAPITAL LETTER R
2817 "Ɽ", // U+2C64: LATIN CAPITAL LETTER R WITH TAIL
2818 "Ꝛ", // U+A75A: LATIN CAPITAL LETTER R ROTUNDA
2819 "Ꞃ", // U+A782: LATIN CAPITAL LETTER INSULAR R
2820 "R", // U+FF32: FULLWIDTH LATIN CAPITAL LETTER R
2821 ],
2822 "R",
2823 ),
2824 (
2825 &[
2826 "ŕ", // U+0155: LATIN SMALL LETTER R WITH ACUTE
2827 "ŗ", // U+0157: LATIN SMALL LETTER R WITH CEDILLA
2828 "ř", // U+0159: LATIN SMALL LETTER R WITH CARON
2829 "ȑ", // U+0211: LATIN SMALL LETTER R WITH DOUBLE GRAVE
2830 "ȓ", // U+0213: LATIN SMALL LETTER R WITH INVERTED BREVE
2831 "ɍ", // U+024D: LATIN SMALL LETTER R WITH STROKE
2832 "ɼ", // U+027C: LATIN SMALL LETTER R WITH LONG LEG
2833 "ɽ", // U+027D: LATIN SMALL LETTER R WITH TAIL
2834 "ɾ", // U+027E: LATIN SMALL LETTER R WITH FISHHOOK
2835 "ɿ", // U+027F: LATIN SMALL LETTER REVERSED R WITH FISHHOOK
2836 "ᵣ", // U+1D63: LATIN SUBSCRIPT SMALL LETTER R
2837 "ᵲ", // U+1D72: LATIN SMALL LETTER R WITH MIDDLE TILDE
2838 "ᵳ", // U+1D73: LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE
2839 "ᶉ", // U+1D89: LATIN SMALL LETTER R WITH PALATAL HOOK
2840 "ṙ", // U+1E59: LATIN SMALL LETTER R WITH DOT ABOVE
2841 "ṛ", // U+1E5B: LATIN SMALL LETTER R WITH DOT BELOW
2842 "ṝ", // U+1E5D: LATIN SMALL LETTER R WITH DOT BELOW AND MACRON
2843 "ṟ", // U+1E5F: LATIN SMALL LETTER R WITH LINE BELOW
2844 "ⓡ", // U+24E1: CIRCLED LATIN SMALL LETTER R
2845 "ꝛ", // U+A75B: LATIN SMALL LETTER R ROTUNDA
2846 "ꞃ", // U+A783: LATIN SMALL LETTER INSULAR R
2847 "r", // U+FF52: FULLWIDTH LATIN SMALL LETTER R
2848 ],
2849 "r",
2850 ),
2851 (
2852 &[
2853 "⒭", // U+24AD: PARENTHESIZED LATIN SMALL LETTER R
2854 ],
2855 "(r)",
2856 ),
2857 (
2858 &[
2859 "Ś", // U+015A: LATIN CAPITAL LETTER S WITH ACUTE
2860 "Ŝ", // U+015C: LATIN CAPITAL LETTER S WITH CIRCUMFLEX
2861 "Ş", // U+015E: LATIN CAPITAL LETTER S WITH CEDILLA
2862 "Š", // U+0160: LATIN CAPITAL LETTER S WITH CARON
2863 "Ș", // U+0218: LATIN CAPITAL LETTER S WITH COMMA BELOW
2864 "Ṡ", // U+1E60: LATIN CAPITAL LETTER S WITH DOT ABOVE
2865 "Ṣ", // U+1E62: LATIN CAPITAL LETTER S WITH DOT BELOW
2866 "Ṥ", // U+1E64: LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
2867 "Ṧ", // U+1E66: LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
2868 "Ṩ", // U+1E68: LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
2869 "Ⓢ", // U+24C8: CIRCLED LATIN CAPITAL LETTER S
2870 "ꜱ", // U+A731: LATIN LETTER SMALL CAPITAL S
2871 "ꞅ", // U+A785: LATIN SMALL LETTER INSULAR S
2872 "S", // U+FF33: FULLWIDTH LATIN CAPITAL LETTER S
2873 ],
2874 "S",
2875 ),
2876 (
2877 &[
2878 "ś", // U+015B: LATIN SMALL LETTER S WITH ACUTE
2879 "ŝ", // U+015D: LATIN SMALL LETTER S WITH CIRCUMFLEX
2880 "ş", // U+015F: LATIN SMALL LETTER S WITH CEDILLA
2881 "š", // U+0161: LATIN SMALL LETTER S WITH CARON
2882 "ſ", // U+017F: LATIN SMALL LETTER LONG S
2883 "ș", // U+0219: LATIN SMALL LETTER S WITH COMMA BELOW
2884 "ȿ", // U+023F: LATIN SMALL LETTER S WITH SWASH TAIL
2885 "ʂ", // U+0282: LATIN SMALL LETTER S WITH HOOK
2886 "ᵴ", // U+1D74: LATIN SMALL LETTER S WITH MIDDLE TILDE
2887 "ᶊ", // U+1D8A: LATIN SMALL LETTER S WITH PALATAL HOOK
2888 "ṡ", // U+1E61: LATIN SMALL LETTER S WITH DOT ABOVE
2889 "ṣ", // U+1E63: LATIN SMALL LETTER S WITH DOT BELOW
2890 "ṥ", // U+1E65: LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE
2891 "ṧ", // U+1E67: LATIN SMALL LETTER S WITH CARON AND DOT ABOVE
2892 "ṩ", // U+1E69: LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE
2893 "ẜ", // U+1E9C: LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE
2894 "ẝ", // U+1E9D: LATIN SMALL LETTER LONG S WITH HIGH STROKE
2895 "ⓢ", // U+24E2: CIRCLED LATIN SMALL LETTER S
2896 "Ꞅ", // U+A784: LATIN CAPITAL LETTER INSULAR S
2897 "s", // U+FF53: FULLWIDTH LATIN SMALL LETTER S
2898 ],
2899 "s",
2900 ),
2901 (
2902 &[
2903 "ẞ", // U+1E9E: LATIN CAPITAL LETTER SHARP S
2904 ],
2905 "SS",
2906 ),
2907 (
2908 &[
2909 "⒮", // U+24AE: PARENTHESIZED LATIN SMALL LETTER S
2910 ],
2911 "(s)",
2912 ),
2913 (
2914 &[
2915 "ß", // U+00DF: LATIN SMALL LETTER SHARP S
2916 ],
2917 "ss",
2918 ),
2919 (
2920 &[
2921 "st", // U+FB06: LATIN SMALL LIGATURE ST
2922 ],
2923 "st",
2924 ),
2925 (
2926 &[
2927 "Ţ", // U+0162: LATIN CAPITAL LETTER T WITH CEDILLA
2928 "Ť", // U+0164: LATIN CAPITAL LETTER T WITH CARON
2929 "Ŧ", // U+0166: LATIN CAPITAL LETTER T WITH STROKE
2930 "Ƭ", // U+01AC: LATIN CAPITAL LETTER T WITH HOOK
2931 "Ʈ", // U+01AE: LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
2932 "Ț", // U+021A: LATIN CAPITAL LETTER T WITH COMMA BELOW
2933 "Ⱦ", // U+023E: LATIN CAPITAL LETTER T WITH DIAGONAL STROKE
2934 "ᴛ", // U+1D1B: LATIN LETTER SMALL CAPITAL T
2935 "Ṫ", // U+1E6A: LATIN CAPITAL LETTER T WITH DOT ABOVE
2936 "Ṭ", // U+1E6C: LATIN CAPITAL LETTER T WITH DOT BELOW
2937 "Ṯ", // U+1E6E: LATIN CAPITAL LETTER T WITH LINE BELOW
2938 "Ṱ", // U+1E70: LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
2939 "Ⓣ", // U+24C9: CIRCLED LATIN CAPITAL LETTER T
2940 "Ꞇ", // U+A786: LATIN CAPITAL LETTER INSULAR T
2941 "T", // U+FF34: FULLWIDTH LATIN CAPITAL LETTER T
2942 ],
2943 "T",
2944 ),
2945 (
2946 &[
2947 "ţ", // U+0163: LATIN SMALL LETTER T WITH CEDILLA
2948 "ť", // U+0165: LATIN SMALL LETTER T WITH CARON
2949 "ŧ", // U+0167: LATIN SMALL LETTER T WITH STROKE
2950 "ƫ", // U+01AB: LATIN SMALL LETTER T WITH PALATAL HOOK
2951 "ƭ", // U+01AD: LATIN SMALL LETTER T WITH HOOK
2952 "ț", // U+021B: LATIN SMALL LETTER T WITH COMMA BELOW
2953 "ȶ", // U+0236: LATIN SMALL LETTER T WITH CURL
2954 "ʇ", // U+0287: LATIN SMALL LETTER TURNED T
2955 "ʈ", // U+0288: LATIN SMALL LETTER T WITH RETROFLEX HOOK
2956 "ᵵ", // U+1D75: LATIN SMALL LETTER T WITH MIDDLE TILDE
2957 "ṫ", // U+1E6B: LATIN SMALL LETTER T WITH DOT ABOVE
2958 "ṭ", // U+1E6D: LATIN SMALL LETTER T WITH DOT BELOW
2959 "ṯ", // U+1E6F: LATIN SMALL LETTER T WITH LINE BELOW
2960 "ṱ", // U+1E71: LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW
2961 "ẗ", // U+1E97: LATIN SMALL LETTER T WITH DIAERESIS
2962 "ⓣ", // U+24E3: CIRCLED LATIN SMALL LETTER T
2963 "ⱦ", // U+2C66: LATIN SMALL LETTER T WITH DIAGONAL STROKE
2964 "t", // U+FF54: FULLWIDTH LATIN SMALL LETTER T
2965 ],
2966 "t",
2967 ),
2968 (
2969 &[
2970 "Þ", // U+00DE: LATIN CAPITAL LETTER THORN
2971 "Ꝧ", // U+A766: LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER
2972 ],
2973 "TH",
2974 ),
2975 (
2976 &[
2977 "Ꜩ", // U+A728: LATIN CAPITAL LETTER TZ
2978 ],
2979 "TZ",
2980 ),
2981 (
2982 &[
2983 "⒯", // U+24AF: PARENTHESIZED LATIN SMALL LETTER T
2984 ],
2985 "(t)",
2986 ),
2987 (
2988 &[
2989 "ʨ", // U+02A8: LATIN SMALL LETTER TC DIGRAPH WITH CURL
2990 ],
2991 "tc",
2992 ),
2993 (
2994 &[
2995 "þ", // U+00FE: LATIN SMALL LETTER THORN
2996 "ᵺ", // U+1D7A: LATIN SMALL LETTER TH WITH STRIKETHROUGH
2997 "ꝧ", // U+A767: LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER
2998 ],
2999 "th",
3000 ),
3001 (
3002 &[
3003 "ʦ", // U+02A6: LATIN SMALL LETTER TS DIGRAPH
3004 ],
3005 "ts",
3006 ),
3007 (
3008 &[
3009 "ꜩ", // U+A729: LATIN SMALL LETTER TZ
3010 ],
3011 "tz",
3012 ),
3013 (
3014 &[
3015 "Ù", // U+00D9: LATIN CAPITAL LETTER U WITH GRAVE
3016 "Ú", // U+00DA: LATIN CAPITAL LETTER U WITH ACUTE
3017 "Û", // U+00DB: LATIN CAPITAL LETTER U WITH CIRCUMFLEX
3018 "Ü", // U+00DC: LATIN CAPITAL LETTER U WITH DIAERESIS
3019 "Ũ", // U+0168: LATIN CAPITAL LETTER U WITH TILDE
3020 "Ū", // U+016A: LATIN CAPITAL LETTER U WITH MACRON
3021 "Ŭ", // U+016C: LATIN CAPITAL LETTER U WITH BREVE
3022 "Ů", // U+016E: LATIN CAPITAL LETTER U WITH RING ABOVE
3023 "Ű", // U+0170: LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
3024 "Ų", // U+0172: LATIN CAPITAL LETTER U WITH OGONEK
3025 "Ư", // U+01AF: LATIN CAPITAL LETTER U WITH HORN
3026 "Ǔ", // U+01D3: LATIN CAPITAL LETTER U WITH CARON
3027 "Ǖ", // U+01D5: LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
3028 "Ǘ", // U+01D7: LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
3029 "Ǚ", // U+01D9: LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
3030 "Ǜ", // U+01DB: LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
3031 "Ȕ", // U+0214: LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
3032 "Ȗ", // U+0216: LATIN CAPITAL LETTER U WITH INVERTED BREVE
3033 "Ʉ", // U+0244: LATIN CAPITAL LETTER U BAR
3034 "ᴜ", // U+1D1C: LATIN LETTER SMALL CAPITAL U
3035 "ᵾ", // U+1D7E: LATIN SMALL CAPITAL LETTER U WITH STROKE
3036 "Ṳ", // U+1E72: LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
3037 "Ṵ", // U+1E74: LATIN CAPITAL LETTER U WITH TILDE BELOW
3038 "Ṷ", // U+1E76: LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
3039 "Ṹ", // U+1E78: LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
3040 "Ṻ", // U+1E7A: LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
3041 "Ụ", // U+1EE4: LATIN CAPITAL LETTER U WITH DOT BELOW
3042 "Ủ", // U+1EE6: LATIN CAPITAL LETTER U WITH HOOK ABOVE
3043 "Ứ", // U+1EE8: LATIN CAPITAL LETTER U WITH HORN AND ACUTE
3044 "Ừ", // U+1EEA: LATIN CAPITAL LETTER U WITH HORN AND GRAVE
3045 "Ử", // U+1EEC: LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
3046 "Ữ", // U+1EEE: LATIN CAPITAL LETTER U WITH HORN AND TILDE
3047 "Ự", // U+1EF0: LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
3048 "Ⓤ", // U+24CA: CIRCLED LATIN CAPITAL LETTER U
3049 "U", // U+FF35: FULLWIDTH LATIN CAPITAL LETTER U
3050 ],
3051 "U",
3052 ),
3053 (
3054 &[
3055 "ù", // U+00F9: LATIN SMALL LETTER U WITH GRAVE
3056 "ú", // U+00FA: LATIN SMALL LETTER U WITH ACUTE
3057 "û", // U+00FB: LATIN SMALL LETTER U WITH CIRCUMFLEX
3058 "ü", // U+00FC: LATIN SMALL LETTER U WITH DIAERESIS
3059 "ũ", // U+0169: LATIN SMALL LETTER U WITH TILDE
3060 "ū", // U+016B: LATIN SMALL LETTER U WITH MACRON
3061 "ŭ", // U+016D: LATIN SMALL LETTER U WITH BREVE
3062 "ů", // U+016F: LATIN SMALL LETTER U WITH RING ABOVE
3063 "ű", // U+0171: LATIN SMALL LETTER U WITH DOUBLE ACUTE
3064 "ų", // U+0173: LATIN SMALL LETTER U WITH OGONEK
3065 "ư", // U+01B0: LATIN SMALL LETTER U WITH HORN
3066 "ǔ", // U+01D4: LATIN SMALL LETTER U WITH CARON
3067 "ǖ", // U+01D6: LATIN SMALL LETTER U WITH DIAERESIS AND MACRON
3068 "ǘ", // U+01D8: LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE
3069 "ǚ", // U+01DA: LATIN SMALL LETTER U WITH DIAERESIS AND CARON
3070 "ǜ", // U+01DC: LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE
3071 "ȕ", // U+0215: LATIN SMALL LETTER U WITH DOUBLE GRAVE
3072 "ȗ", // U+0217: LATIN SMALL LETTER U WITH INVERTED BREVE
3073 "ʉ", // U+0289: LATIN SMALL LETTER U BAR
3074 "ᵤ", // U+1D64: LATIN SUBSCRIPT SMALL LETTER U
3075 "ᶙ", // U+1D99: LATIN SMALL LETTER U WITH RETROFLEX HOOK
3076 "ṳ", // U+1E73: LATIN SMALL LETTER U WITH DIAERESIS BELOW
3077 "ṵ", // U+1E75: LATIN SMALL LETTER U WITH TILDE BELOW
3078 "ṷ", // U+1E77: LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW
3079 "ṹ", // U+1E79: LATIN SMALL LETTER U WITH TILDE AND ACUTE
3080 "ṻ", // U+1E7B: LATIN SMALL LETTER U WITH MACRON AND DIAERESIS
3081 "ụ", // U+1EE5: LATIN SMALL LETTER U WITH DOT BELOW
3082 "ủ", // U+1EE7: LATIN SMALL LETTER U WITH HOOK ABOVE
3083 "ứ", // U+1EE9: LATIN SMALL LETTER U WITH HORN AND ACUTE
3084 "ừ", // U+1EEB: LATIN SMALL LETTER U WITH HORN AND GRAVE
3085 "ử", // U+1EED: LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE
3086 "ữ", // U+1EEF: LATIN SMALL LETTER U WITH HORN AND TILDE
3087 "ự", // U+1EF1: LATIN SMALL LETTER U WITH HORN AND DOT BELOW
3088 "ⓤ", // U+24E4: CIRCLED LATIN SMALL LETTER U
3089 "u", // U+FF55: FULLWIDTH LATIN SMALL LETTER U
3090 ],
3091 "u",
3092 ),
3093 (
3094 &[
3095 "⒰", // U+24B0: PARENTHESIZED LATIN SMALL LETTER U
3096 ],
3097 "(u)",
3098 ),
3099 (
3100 &[
3101 "ᵫ", // U+1D6B: LATIN SMALL LETTER UE
3102 ],
3103 "ue",
3104 ),
3105 (
3106 &[
3107 "Ʋ", // U+01B2: LATIN CAPITAL LETTER V WITH HOOK
3108 "Ʌ", // U+0245: LATIN CAPITAL LETTER TURNED V
3109 "ᴠ", // U+1D20: LATIN LETTER SMALL CAPITAL V
3110 "Ṽ", // U+1E7C: LATIN CAPITAL LETTER V WITH TILDE
3111 "Ṿ", // U+1E7E: LATIN CAPITAL LETTER V WITH DOT BELOW
3112 "Ỽ", // U+1EFC: LATIN CAPITAL LETTER MIDDLE-WELSH V
3113 "Ⓥ", // U+24CB: CIRCLED LATIN CAPITAL LETTER V
3114 "Ꝟ", // U+A75E: LATIN CAPITAL LETTER V WITH DIAGONAL STROKE
3115 "Ꝩ", // U+A768: LATIN CAPITAL LETTER VEND
3116 "V", // U+FF36: FULLWIDTH LATIN CAPITAL LETTER V
3117 ],
3118 "V",
3119 ),
3120 (
3121 &[
3122 "ʋ", // U+028B: LATIN SMALL LETTER V WITH HOOK
3123 "ʌ", // U+028C: LATIN SMALL LETTER TURNED V
3124 "ᵥ", // U+1D65: LATIN SUBSCRIPT SMALL LETTER V
3125 "ᶌ", // U+1D8C: LATIN SMALL LETTER V WITH PALATAL HOOK
3126 "ṽ", // U+1E7D: LATIN SMALL LETTER V WITH TILDE
3127 "ṿ", // U+1E7F: LATIN SMALL LETTER V WITH DOT BELOW
3128 "ⓥ", // U+24E5: CIRCLED LATIN SMALL LETTER V
3129 "ⱱ", // U+2C71: LATIN SMALL LETTER V WITH RIGHT HOOK
3130 "ⱴ", // U+2C74: LATIN SMALL LETTER V WITH CURL
3131 "ꝟ", // U+A75F: LATIN SMALL LETTER V WITH DIAGONAL STROKE
3132 "v", // U+FF56: FULLWIDTH LATIN SMALL LETTER V
3133 ],
3134 "v",
3135 ),
3136 (
3137 &[
3138 "Ꝡ", // U+A760: LATIN CAPITAL LETTER VY
3139 ],
3140 "VY",
3141 ),
3142 (
3143 &[
3144 "⒱", // U+24B1: PARENTHESIZED LATIN SMALL LETTER V
3145 ],
3146 "(v)",
3147 ),
3148 (
3149 &[
3150 "ꝡ", // U+A761: LATIN SMALL LETTER VY
3151 ],
3152 "vy",
3153 ),
3154 (
3155 &[
3156 "Ŵ", // U+0174: LATIN CAPITAL LETTER W WITH CIRCUMFLEX
3157 "Ƿ", // U+01F7: LATIN CAPITAL LETTER WYNN
3158 "ᴡ", // U+1D21: LATIN LETTER SMALL CAPITAL W
3159 "Ẁ", // U+1E80: LATIN CAPITAL LETTER W WITH GRAVE
3160 "Ẃ", // U+1E82: LATIN CAPITAL LETTER W WITH ACUTE
3161 "Ẅ", // U+1E84: LATIN CAPITAL LETTER W WITH DIAERESIS
3162 "Ẇ", // U+1E86: LATIN CAPITAL LETTER W WITH DOT ABOVE
3163 "Ẉ", // U+1E88: LATIN CAPITAL LETTER W WITH DOT BELOW
3164 "Ⓦ", // U+24CC: CIRCLED LATIN CAPITAL LETTER W
3165 "Ⱳ", // U+2C72: LATIN CAPITAL LETTER W WITH HOOK
3166 "W", // U+FF37: FULLWIDTH LATIN CAPITAL LETTER W
3167 ],
3168 "W",
3169 ),
3170 (
3171 &[
3172 "ŵ", // U+0175: LATIN SMALL LETTER W WITH CIRCUMFLEX
3173 "ƿ", // U+01BF: LATIN LETTER WYNN
3174 "ʍ", // U+028D: LATIN SMALL LETTER TURNED W
3175 "ẁ", // U+1E81: LATIN SMALL LETTER W WITH GRAVE
3176 "ẃ", // U+1E83: LATIN SMALL LETTER W WITH ACUTE
3177 "ẅ", // U+1E85: LATIN SMALL LETTER W WITH DIAERESIS
3178 "ẇ", // U+1E87: LATIN SMALL LETTER W WITH DOT ABOVE
3179 "ẉ", // U+1E89: LATIN SMALL LETTER W WITH DOT BELOW
3180 "ẘ", // U+1E98: LATIN SMALL LETTER W WITH RING ABOVE
3181 "ⓦ", // U+24E6: CIRCLED LATIN SMALL LETTER W
3182 "ⱳ", // U+2C73: LATIN SMALL LETTER W WITH HOOK
3183 "w", // U+FF57: FULLWIDTH LATIN SMALL LETTER W
3184 ],
3185 "w",
3186 ),
3187 (
3188 &[
3189 "⒲", // U+24B2: PARENTHESIZED LATIN SMALL LETTER W
3190 ],
3191 "(w)",
3192 ),
3193 (
3194 &[
3195 "Ẋ", // U+1E8A: LATIN CAPITAL LETTER X WITH DOT ABOVE
3196 "Ẍ", // U+1E8C: LATIN CAPITAL LETTER X WITH DIAERESIS
3197 "Ⓧ", // U+24CD: CIRCLED LATIN CAPITAL LETTER X
3198 "X", // U+FF38: FULLWIDTH LATIN CAPITAL LETTER X
3199 ],
3200 "X",
3201 ),
3202 (
3203 &[
3204 "ᶍ", // U+1D8D: LATIN SMALL LETTER X WITH PALATAL HOOK
3205 "ẋ", // U+1E8B: LATIN SMALL LETTER X WITH DOT ABOVE
3206 "ẍ", // U+1E8D: LATIN SMALL LETTER X WITH DIAERESIS
3207 "ₓ", // U+2093: LATIN SUBSCRIPT SMALL LETTER X
3208 "ⓧ", // U+24E7: CIRCLED LATIN SMALL LETTER X
3209 "x", // U+FF58: FULLWIDTH LATIN SMALL LETTER X
3210 ],
3211 "x",
3212 ),
3213 (
3214 &[
3215 "⒳", // U+24B3: PARENTHESIZED LATIN SMALL LETTER X
3216 ],
3217 "(x)",
3218 ),
3219 (
3220 &[
3221 "Ý", // U+00DD: LATIN CAPITAL LETTER Y WITH ACUTE
3222 "Ŷ", // U+0176: LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
3223 "Ÿ", // U+0178: LATIN CAPITAL LETTER Y WITH DIAERESIS
3224 "Ƴ", // U+01B3: LATIN CAPITAL LETTER Y WITH HOOK
3225 "Ȳ", // U+0232: LATIN CAPITAL LETTER Y WITH MACRON
3226 "Ɏ", // U+024E: LATIN CAPITAL LETTER Y WITH STROKE
3227 "ʏ", // U+028F: LATIN LETTER SMALL CAPITAL Y
3228 "Ẏ", // U+1E8E: LATIN CAPITAL LETTER Y WITH DOT ABOVE
3229 "Ỳ", // U+1EF2: LATIN CAPITAL LETTER Y WITH GRAVE
3230 "Ỵ", // U+1EF4: LATIN CAPITAL LETTER Y WITH DOT BELOW
3231 "Ỷ", // U+1EF6: LATIN CAPITAL LETTER Y WITH HOOK ABOVE
3232 "Ỹ", // U+1EF8: LATIN CAPITAL LETTER Y WITH TILDE
3233 "Ỿ", // U+1EFE: LATIN CAPITAL LETTER Y WITH LOOP
3234 "Ⓨ", // U+24CE: CIRCLED LATIN CAPITAL LETTER Y
3235 "Y", // U+FF39: FULLWIDTH LATIN CAPITAL LETTER Y
3236 ],
3237 "Y",
3238 ),
3239 (
3240 &[
3241 "ý", // U+00FD: LATIN SMALL LETTER Y WITH ACUTE
3242 "ÿ", // U+00FF: LATIN SMALL LETTER Y WITH DIAERESIS
3243 "ŷ", // U+0177: LATIN SMALL LETTER Y WITH CIRCUMFLEX
3244 "ƴ", // U+01B4: LATIN SMALL LETTER Y WITH HOOK
3245 "ȳ", // U+0233: LATIN SMALL LETTER Y WITH MACRON
3246 "ɏ", // U+024F: LATIN SMALL LETTER Y WITH STROKE
3247 "ʎ", // U+028E: LATIN SMALL LETTER TURNED Y
3248 "ẏ", // U+1E8F: LATIN SMALL LETTER Y WITH DOT ABOVE
3249 "ẙ", // U+1E99: LATIN SMALL LETTER Y WITH RING ABOVE
3250 "ỳ", // U+1EF3: LATIN SMALL LETTER Y WITH GRAVE
3251 "ỵ", // U+1EF5: LATIN SMALL LETTER Y WITH DOT BELOW
3252 "ỷ", // U+1EF7: LATIN SMALL LETTER Y WITH HOOK ABOVE
3253 "ỹ", // U+1EF9: LATIN SMALL LETTER Y WITH TILDE
3254 "ỿ", // U+1EFF: LATIN SMALL LETTER Y WITH LOOP
3255 "ⓨ", // U+24E8: CIRCLED LATIN SMALL LETTER Y
3256 "y", // U+FF59: FULLWIDTH LATIN SMALL LETTER Y
3257 ],
3258 "y",
3259 ),
3260 (
3261 &[
3262 "⒴", // U+24B4: PARENTHESIZED LATIN SMALL LETTER Y
3263 ],
3264 "(y)",
3265 ),
3266 (
3267 &[
3268 "Ź", // U+0179: LATIN CAPITAL LETTER Z WITH ACUTE
3269 "Ż", // U+017B: LATIN CAPITAL LETTER Z WITH DOT ABOVE
3270 "Ž", // U+017D: LATIN CAPITAL LETTER Z WITH CARON
3271 "Ƶ", // U+01B5: LATIN CAPITAL LETTER Z WITH STROKE
3272 "Ȝ", // U+021C: LATIN CAPITAL LETTER YOGH
3273 "Ȥ", // U+0224: LATIN CAPITAL LETTER Z WITH HOOK
3274 "ᴢ", // U+1D22: LATIN LETTER SMALL CAPITAL Z
3275 "Ẑ", // U+1E90: LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
3276 "Ẓ", // U+1E92: LATIN CAPITAL LETTER Z WITH DOT BELOW
3277 "Ẕ", // U+1E94: LATIN CAPITAL LETTER Z WITH LINE BELOW
3278 "Ⓩ", // U+24CF: CIRCLED LATIN CAPITAL LETTER Z
3279 "Ⱬ", // U+2C6B: LATIN CAPITAL LETTER Z WITH DESCENDER
3280 "Ꝣ", // U+A762: LATIN CAPITAL LETTER VISIGOTHIC Z
3281 "Z", // U+FF3A: FULLWIDTH LATIN CAPITAL LETTER Z
3282 ],
3283 "Z",
3284 ),
3285 (
3286 &[
3287 "ź", // U+017A: LATIN SMALL LETTER Z WITH ACUTE
3288 "ż", // U+017C: LATIN SMALL LETTER Z WITH DOT ABOVE
3289 "ž", // U+017E: LATIN SMALL LETTER Z WITH CARON
3290 "ƶ", // U+01B6: LATIN SMALL LETTER Z WITH STROKE
3291 "ȝ", // U+021D: LATIN SMALL LETTER YOGH
3292 "ȥ", // U+0225: LATIN SMALL LETTER Z WITH HOOK
3293 "ɀ", // U+0240: LATIN SMALL LETTER Z WITH SWASH TAIL
3294 "ʐ", // U+0290: LATIN SMALL LETTER Z WITH RETROFLEX HOOK
3295 "ʑ", // U+0291: LATIN SMALL LETTER Z WITH CURL
3296 "ᵶ", // U+1D76: LATIN SMALL LETTER Z WITH MIDDLE TILDE
3297 "ᶎ", // U+1D8E: LATIN SMALL LETTER Z WITH PALATAL HOOK
3298 "ẑ", // U+1E91: LATIN SMALL LETTER Z WITH CIRCUMFLEX
3299 "ẓ", // U+1E93: LATIN SMALL LETTER Z WITH DOT BELOW
3300 "ẕ", // U+1E95: LATIN SMALL LETTER Z WITH LINE BELOW
3301 "ⓩ", // U+24E9: CIRCLED LATIN SMALL LETTER Z
3302 "ⱬ", // U+2C6C: LATIN SMALL LETTER Z WITH DESCENDER
3303 "ꝣ", // U+A763: LATIN SMALL LETTER VISIGOTHIC Z
3304 "z", // U+FF5A: FULLWIDTH LATIN SMALL LETTER Z
3305 ],
3306 "z",
3307 ),
3308 (
3309 &[
3310 "⒵", // U+24B5: PARENTHESIZED LATIN SMALL LETTER Z
3311 ],
3312 "(z)",
3313 ),
3314 (
3315 &[
3316 "⁰", // U+2070: SUPERSCRIPT ZERO
3317 "₀", // U+2080: SUBSCRIPT ZERO
3318 "⓪", // U+24EA: CIRCLED DIGIT ZERO
3319 "⓿", // U+24FF: NEGATIVE CIRCLED DIGIT ZERO
3320 "0", // U+FF10: FULLWIDTH DIGIT ZERO
3321 ],
3322 "0",
3323 ),
3324 (
3325 &[
3326 "¹", // U+00B9: SUPERSCRIPT ONE
3327 "₁", // U+2081: SUBSCRIPT ONE
3328 "①", // U+2460: CIRCLED DIGIT ONE
3329 "⓵", // U+24F5: DOUBLE CIRCLED DIGIT ONE
3330 "❶", // U+2776: DINGBAT NEGATIVE CIRCLED DIGIT ONE
3331 "➀", // U+2780: DINGBAT CIRCLED SANS-SERIF DIGIT ONE
3332 "➊", // U+278A: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE
3333 "1", // U+FF11: FULLWIDTH DIGIT ONE
3334 ],
3335 "1",
3336 ),
3337 (
3338 &[
3339 "⒈", // U+2488: DIGIT ONE FULL STOP
3340 ],
3341 "1.",
3342 ),
3343 (
3344 &[
3345 "⑴", // U+2474: PARENTHESIZED DIGIT ONE
3346 ],
3347 "(1)",
3348 ),
3349 (
3350 &[
3351 "²", // U+00B2: SUPERSCRIPT TWO
3352 "₂", // U+2082: SUBSCRIPT TWO
3353 "②", // U+2461: CIRCLED DIGIT TWO
3354 "⓶", // U+24F6: DOUBLE CIRCLED DIGIT TWO
3355 "❷", // U+2777: DINGBAT NEGATIVE CIRCLED DIGIT TWO
3356 "➁", // U+2781: DINGBAT CIRCLED SANS-SERIF DIGIT TWO
3357 "➋", // U+278B: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO
3358 "2", // U+FF12: FULLWIDTH DIGIT TWO
3359 ],
3360 "2",
3361 ),
3362 (
3363 &[
3364 "⒉", // U+2489: DIGIT TWO FULL STOP
3365 ],
3366 "2.",
3367 ),
3368 (
3369 &[
3370 "⑵", // U+2475: PARENTHESIZED DIGIT TWO
3371 ],
3372 "(2)",
3373 ),
3374 (
3375 &[
3376 "³", // U+00B3: SUPERSCRIPT THREE
3377 "₃", // U+2083: SUBSCRIPT THREE
3378 "③", // U+2462: CIRCLED DIGIT THREE
3379 "⓷", // U+24F7: DOUBLE CIRCLED DIGIT THREE
3380 "❸", // U+2778: DINGBAT NEGATIVE CIRCLED DIGIT THREE
3381 "➂", // U+2782: DINGBAT CIRCLED SANS-SERIF DIGIT THREE
3382 "➌", // U+278C: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE
3383 "3", // U+FF13: FULLWIDTH DIGIT THREE
3384 ],
3385 "3",
3386 ),
3387 (
3388 &[
3389 "⒊", // U+248A: DIGIT THREE FULL STOP
3390 ],
3391 "3.",
3392 ),
3393 (
3394 &[
3395 "⑶", // U+2476: PARENTHESIZED DIGIT THREE
3396 ],
3397 "(3)",
3398 ),
3399 (
3400 &[
3401 "⁴", // U+2074: SUPERSCRIPT FOUR
3402 "₄", // U+2084: SUBSCRIPT FOUR
3403 "④", // U+2463: CIRCLED DIGIT FOUR
3404 "⓸", // U+24F8: DOUBLE CIRCLED DIGIT FOUR
3405 "❹", // U+2779: DINGBAT NEGATIVE CIRCLED DIGIT FOUR
3406 "➃", // U+2783: DINGBAT CIRCLED SANS-SERIF DIGIT FOUR
3407 "➍", // U+278D: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR
3408 "4", // U+FF14: FULLWIDTH DIGIT FOUR
3409 ],
3410 "4",
3411 ),
3412 (
3413 &[
3414 "⒋", // U+248B: DIGIT FOUR FULL STOP
3415 ],
3416 "4.",
3417 ),
3418 (
3419 &[
3420 "⑷", // U+2477: PARENTHESIZED DIGIT FOUR
3421 ],
3422 "(4)",
3423 ),
3424 (
3425 &[
3426 "⁵", // U+2075: SUPERSCRIPT FIVE
3427 "₅", // U+2085: SUBSCRIPT FIVE
3428 "⑤", // U+2464: CIRCLED DIGIT FIVE
3429 "⓹", // U+24F9: DOUBLE CIRCLED DIGIT FIVE
3430 "❺", // U+277A: DINGBAT NEGATIVE CIRCLED DIGIT FIVE
3431 "➄", // U+2784: DINGBAT CIRCLED SANS-SERIF DIGIT FIVE
3432 "➎", // U+278E: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE
3433 "5", // U+FF15: FULLWIDTH DIGIT FIVE
3434 ],
3435 "5",
3436 ),
3437 (
3438 &[
3439 "⒌", // U+248C: DIGIT FIVE FULL STOP
3440 ],
3441 "5.",
3442 ),
3443 (
3444 &[
3445 "⑸", // U+2478: PARENTHESIZED DIGIT FIVE
3446 ],
3447 "(5)",
3448 ),
3449 (
3450 &[
3451 "⁶", // U+2076: SUPERSCRIPT SIX
3452 "₆", // U+2086: SUBSCRIPT SIX
3453 "⑥", // U+2465: CIRCLED DIGIT SIX
3454 "⓺", // U+24FA: DOUBLE CIRCLED DIGIT SIX
3455 "❻", // U+277B: DINGBAT NEGATIVE CIRCLED DIGIT SIX
3456 "➅", // U+2785: DINGBAT CIRCLED SANS-SERIF DIGIT SIX
3457 "➏", // U+278F: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX
3458 "6", // U+FF16: FULLWIDTH DIGIT SIX
3459 ],
3460 "6",
3461 ),
3462 (
3463 &[
3464 "⒍", // U+248D: DIGIT SIX FULL STOP
3465 ],
3466 "6.",
3467 ),
3468 (
3469 &[
3470 "⑹", // U+2479: PARENTHESIZED DIGIT SIX
3471 ],
3472 "(6)",
3473 ),
3474 (
3475 &[
3476 "⁷", // U+2077: SUPERSCRIPT SEVEN
3477 "₇", // U+2087: SUBSCRIPT SEVEN
3478 "⑦", // U+2466: CIRCLED DIGIT SEVEN
3479 "⓻", // U+24FB: DOUBLE CIRCLED DIGIT SEVEN
3480 "❼", // U+277C: DINGBAT NEGATIVE CIRCLED DIGIT SEVEN
3481 "➆", // U+2786: DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN
3482 "➐", // U+2790: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN
3483 "7", // U+FF17: FULLWIDTH DIGIT SEVEN
3484 ],
3485 "7",
3486 ),
3487 (
3488 &[
3489 "⒎", // U+248E: DIGIT SEVEN FULL STOP
3490 ],
3491 "7.",
3492 ),
3493 (
3494 &[
3495 "⑺", // U+247A: PARENTHESIZED DIGIT SEVEN
3496 ],
3497 "(7)",
3498 ),
3499 (
3500 &[
3501 "⁸", // U+2078: SUPERSCRIPT EIGHT
3502 "₈", // U+2088: SUBSCRIPT EIGHT
3503 "⑧", // U+2467: CIRCLED DIGIT EIGHT
3504 "⓼", // U+24FC: DOUBLE CIRCLED DIGIT EIGHT
3505 "❽", // U+277D: DINGBAT NEGATIVE CIRCLED DIGIT EIGHT
3506 "➇", // U+2787: DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT
3507 "➑", // U+2791: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT
3508 "8", // U+FF18: FULLWIDTH DIGIT EIGHT
3509 ],
3510 "8",
3511 ),
3512 (
3513 &[
3514 "⒏", // U+248F: DIGIT EIGHT FULL STOP
3515 ],
3516 "8.",
3517 ),
3518 (
3519 &[
3520 "⑻", // U+247B: PARENTHESIZED DIGIT EIGHT
3521 ],
3522 "(8)",
3523 ),
3524 (
3525 &[
3526 "⁹", // U+2079: SUPERSCRIPT NINE
3527 "₉", // U+2089: SUBSCRIPT NINE
3528 "⑨", // U+2468: CIRCLED DIGIT NINE
3529 "⓽", // U+24FD: DOUBLE CIRCLED DIGIT NINE
3530 "❾", // U+277E: DINGBAT NEGATIVE CIRCLED DIGIT NINE
3531 "➈", // U+2788: DINGBAT CIRCLED SANS-SERIF DIGIT NINE
3532 "➒", // U+2792: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE
3533 "9", // U+FF19: FULLWIDTH DIGIT NINE
3534 ],
3535 "9",
3536 ),
3537 (
3538 &[
3539 "⒐", // U+2490: DIGIT NINE FULL STOP
3540 ],
3541 "9.",
3542 ),
3543 (
3544 &[
3545 "⑼", // U+247C: PARENTHESIZED DIGIT NINE
3546 ],
3547 "(9)",
3548 ),
3549 (
3550 &[
3551 "⑩", // U+2469: CIRCLED NUMBER TEN
3552 "⓾", // U+24FE: DOUBLE CIRCLED NUMBER TEN
3553 "❿", // U+277F: DINGBAT NEGATIVE CIRCLED NUMBER TEN
3554 "➉", // U+2789: DINGBAT CIRCLED SANS-SERIF NUMBER TEN
3555 "➓", // U+2793: DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN
3556 ],
3557 "10",
3558 ),
3559 (
3560 &[
3561 "⒑", // U+2491: NUMBER TEN FULL STOP
3562 ],
3563 "10.",
3564 ),
3565 (
3566 &[
3567 "⑽", // U+247D: PARENTHESIZED NUMBER TEN
3568 ],
3569 "(10)",
3570 ),
3571 (
3572 &[
3573 "⑪", // U+246A: CIRCLED NUMBER ELEVEN
3574 "⓫", // U+24EB: NEGATIVE CIRCLED NUMBER ELEVEN
3575 ],
3576 "11",
3577 ),
3578 (
3579 &[
3580 "⒒", // U+2492: NUMBER ELEVEN FULL STOP
3581 ],
3582 "11.",
3583 ),
3584 (
3585 &[
3586 "⑾", // U+247E: PARENTHESIZED NUMBER ELEVEN
3587 ],
3588 "(11)",
3589 ),
3590 (
3591 &[
3592 "⑫", // U+246B: CIRCLED NUMBER TWELVE
3593 "⓬", // U+24EC: NEGATIVE CIRCLED NUMBER TWELVE
3594 ],
3595 "12",
3596 ),
3597 (
3598 &[
3599 "⒓", // U+2493: NUMBER TWELVE FULL STOP
3600 ],
3601 "12.",
3602 ),
3603 (
3604 &[
3605 "⑿", // U+247F: PARENTHESIZED NUMBER TWELVE
3606 ],
3607 "(12)",
3608 ),
3609 (
3610 &[
3611 "⑬", // U+246C: CIRCLED NUMBER THIRTEEN
3612 "⓭", // U+24ED: NEGATIVE CIRCLED NUMBER THIRTEEN
3613 ],
3614 "13",
3615 ),
3616 (
3617 &[
3618 "⒔", // U+2494: NUMBER THIRTEEN FULL STOP
3619 ],
3620 "13.",
3621 ),
3622 (
3623 &[
3624 "⒀", // U+2480: PARENTHESIZED NUMBER THIRTEEN
3625 ],
3626 "(13)",
3627 ),
3628 (
3629 &[
3630 "⑭", // U+246D: CIRCLED NUMBER FOURTEEN
3631 "⓮", // U+24EE: NEGATIVE CIRCLED NUMBER FOURTEEN
3632 ],
3633 "14",
3634 ),
3635 (
3636 &[
3637 "⒕", // U+2495: NUMBER FOURTEEN FULL STOP
3638 ],
3639 "14.",
3640 ),
3641 (
3642 &[
3643 "⒁", // U+2481: PARENTHESIZED NUMBER FOURTEEN
3644 ],
3645 "(14)",
3646 ),
3647 (
3648 &[
3649 "⑮", // U+246E: CIRCLED NUMBER FIFTEEN
3650 "⓯", // U+24EF: NEGATIVE CIRCLED NUMBER FIFTEEN
3651 ],
3652 "15",
3653 ),
3654 (
3655 &[
3656 "⒖", // U+2496: NUMBER FIFTEEN FULL STOP
3657 ],
3658 "15.",
3659 ),
3660 (
3661 &[
3662 "⒂", // U+2482: PARENTHESIZED NUMBER FIFTEEN
3663 ],
3664 "(15)",
3665 ),
3666 (
3667 &[
3668 "⑯", // U+246F: CIRCLED NUMBER SIXTEEN
3669 "⓰", // U+24F0: NEGATIVE CIRCLED NUMBER SIXTEEN
3670 ],
3671 "16",
3672 ),
3673 (
3674 &[
3675 "⒗", // U+2497: NUMBER SIXTEEN FULL STOP
3676 ],
3677 "16.",
3678 ),
3679 (
3680 &[
3681 "⒃", // U+2483: PARENTHESIZED NUMBER SIXTEEN
3682 ],
3683 "(16)",
3684 ),
3685 (
3686 &[
3687 "⑰", // U+2470: CIRCLED NUMBER SEVENTEEN
3688 "⓱", // U+24F1: NEGATIVE CIRCLED NUMBER SEVENTEEN
3689 ],
3690 "17",
3691 ),
3692 (
3693 &[
3694 "⒘", // U+2498: NUMBER SEVENTEEN FULL STOP
3695 ],
3696 "17.",
3697 ),
3698 (
3699 &[
3700 "⒄", // U+2484: PARENTHESIZED NUMBER SEVENTEEN
3701 ],
3702 "(17)",
3703 ),
3704 (
3705 &[
3706 "⑱", // U+2471: CIRCLED NUMBER EIGHTEEN
3707 "⓲", // U+24F2: NEGATIVE CIRCLED NUMBER EIGHTEEN
3708 ],
3709 "18",
3710 ),
3711 (
3712 &[
3713 "⒙", // U+2499: NUMBER EIGHTEEN FULL STOP
3714 ],
3715 "18.",
3716 ),
3717 (
3718 &[
3719 "⒅", // U+2485: PARENTHESIZED NUMBER EIGHTEEN
3720 ],
3721 "(18)",
3722 ),
3723 (
3724 &[
3725 "⑲", // U+2472: CIRCLED NUMBER NINETEEN
3726 "⓳", // U+24F3: NEGATIVE CIRCLED NUMBER NINETEEN
3727 ],
3728 "19",
3729 ),
3730 (
3731 &[
3732 "⒚", // U+249A: NUMBER NINETEEN FULL STOP
3733 ],
3734 "19.",
3735 ),
3736 (
3737 &[
3738 "⒆", // U+2486: PARENTHESIZED NUMBER NINETEEN
3739 ],
3740 "(19)",
3741 ),
3742 (
3743 &[
3744 "⑳", // U+2473: CIRCLED NUMBER TWENTY
3745 "⓴", // U+24F4: NEGATIVE CIRCLED NUMBER TWENTY
3746 ],
3747 "20",
3748 ),
3749 (
3750 &[
3751 "⒛", // U+249B: NUMBER TWENTY FULL STOP
3752 ],
3753 "20.",
3754 ),
3755 (
3756 &[
3757 "⒇", // U+2487: PARENTHESIZED NUMBER TWENTY
3758 ],
3759 "(20)",
3760 ),
3761 (
3762 &[
3763 "«", // U+00AB: LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
3764 "»", // U+00BB: RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
3765 "“", // U+201C: LEFT DOUBLE QUOTATION MARK
3766 "”", // U+201D: RIGHT DOUBLE QUOTATION MARK
3767 "„", // U+201E: DOUBLE LOW-9 QUOTATION MARK
3768 "″", // U+2033: DOUBLE PRIME
3769 "‶", // U+2036: REVERSED DOUBLE PRIME
3770 "❝", // U+275D: HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT
3771 "❞", // U+275E: HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT
3772 "❮", // U+276E: HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT
3773 "❯", // U+276F: HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT
3774 """, // U+FF02: FULLWIDTH QUOTATION MARK
3775 ],
3776 "\"",
3777 ),
3778 (
3779 &[
3780 "‘", // U+2018: LEFT SINGLE QUOTATION MARK
3781 "’", // U+2019: RIGHT SINGLE QUOTATION MARK
3782 "‚", // U+201A: SINGLE LOW-9 QUOTATION MARK
3783 "‛", // U+201B: SINGLE HIGH-REVERSED-9 QUOTATION MARK
3784 "′", // U+2032: PRIME
3785 "‵", // U+2035: REVERSED PRIME
3786 "‹", // U+2039: SINGLE LEFT-POINTING ANGLE QUOTATION MARK
3787 "›", // U+203A: SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
3788 "❛", // U+275B: HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT
3789 "❜", // U+275C: HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT
3790 "'", // U+FF07: FULLWIDTH APOSTROPHE
3791 ],
3792 "'",
3793 ),
3794 (
3795 &[
3796 "‐", // U+2010: HYPHEN
3797 "‑", // U+2011: NON-BREAKING HYPHEN
3798 "‒", // U+2012: FIGURE DASH
3799 "–", // U+2013: EN DASH
3800 "—", // U+2014: EM DASH
3801 "⁻", // U+207B: SUPERSCRIPT MINUS
3802 "₋", // U+208B: SUBSCRIPT MINUS
3803 "-", // U+FF0D: FULLWIDTH HYPHEN-MINUS
3804 ],
3805 "-",
3806 ),
3807 (
3808 &[
3809 "⁅", // U+2045: LEFT SQUARE BRACKET WITH QUILL
3810 "❲", // U+2772: LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT
3811 "[", // U+FF3B: FULLWIDTH LEFT SQUARE BRACKET
3812 ],
3813 "[",
3814 ),
3815 (
3816 &[
3817 "⁆", // U+2046: RIGHT SQUARE BRACKET WITH QUILL
3818 "❳", // U+2773: LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT
3819 "]", // U+FF3D: FULLWIDTH RIGHT SQUARE BRACKET
3820 ],
3821 "]",
3822 ),
3823 (
3824 &[
3825 "⁽", // U+207D: SUPERSCRIPT LEFT PARENTHESIS
3826 "₍", // U+208D: SUBSCRIPT LEFT PARENTHESIS
3827 "❨", // U+2768: MEDIUM LEFT PARENTHESIS ORNAMENT
3828 "❪", // U+276A: MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT
3829 "(", // U+FF08: FULLWIDTH LEFT PARENTHESIS
3830 ],
3831 "(",
3832 ),
3833 (
3834 &[
3835 "⸨", // U+2E28: LEFT DOUBLE PARENTHESIS
3836 ],
3837 "((",
3838 ),
3839 (
3840 &[
3841 "⁾", // U+207E: SUPERSCRIPT RIGHT PARENTHESIS
3842 "₎", // U+208E: SUBSCRIPT RIGHT PARENTHESIS
3843 "❩", // U+2769: MEDIUM RIGHT PARENTHESIS ORNAMENT
3844 "❫", // U+276B: MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT
3845 ")", // U+FF09: FULLWIDTH RIGHT PARENTHESIS
3846 ],
3847 ")",
3848 ),
3849 (
3850 &[
3851 "⸩", // U+2E29: RIGHT DOUBLE PARENTHESIS
3852 ],
3853 "))",
3854 ),
3855 (
3856 &[
3857 "❬", // U+276C: MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT
3858 "❰", // U+2770: HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT
3859 "<", // U+FF1C: FULLWIDTH LESS-THAN SIGN
3860 ],
3861 "<",
3862 ),
3863 (
3864 &[
3865 "❭", // U+276D: MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT
3866 "❱", // U+2771: HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT
3867 ">", // U+FF1E: FULLWIDTH GREATER-THAN SIGN
3868 ],
3869 ">",
3870 ),
3871 (
3872 &[
3873 "❴", // U+2774: MEDIUM LEFT CURLY BRACKET ORNAMENT
3874 "{", // U+FF5B: FULLWIDTH LEFT CURLY BRACKET
3875 ],
3876 "{",
3877 ),
3878 (
3879 &[
3880 "❵", // U+2775: MEDIUM RIGHT CURLY BRACKET ORNAMENT
3881 "}", // U+FF5D: FULLWIDTH RIGHT CURLY BRACKET
3882 ],
3883 "}",
3884 ),
3885 (
3886 &[
3887 "⁺", // U+207A: SUPERSCRIPT PLUS SIGN
3888 "₊", // U+208A: SUBSCRIPT PLUS SIGN
3889 "+", // U+FF0B: FULLWIDTH PLUS SIGN
3890 ],
3891 "+",
3892 ),
3893 (
3894 &[
3895 "⁼", // U+207C: SUPERSCRIPT EQUALS SIGN
3896 "₌", // U+208C: SUBSCRIPT EQUALS SIGN
3897 "=", // U+FF1D: FULLWIDTH EQUALS SIGN
3898 ],
3899 "=",
3900 ),
3901 (
3902 &[
3903 "!", // U+FF01: FULLWIDTH EXCLAMATION MARK
3904 ],
3905 "!",
3906 ),
3907 (
3908 &[
3909 "‼", // U+203C: DOUBLE EXCLAMATION MARK
3910 ],
3911 "!!",
3912 ),
3913 (
3914 &[
3915 "⁉", // U+2049: EXCLAMATION QUESTION MARK
3916 ],
3917 "!?",
3918 ),
3919 (
3920 &[
3921 "#", // U+FF03: FULLWIDTH NUMBER SIGN
3922 ],
3923 "#",
3924 ),
3925 (
3926 &[
3927 "$", // U+FF04: FULLWIDTH DOLLAR SIGN
3928 ],
3929 "$",
3930 ),
3931 (
3932 &[
3933 "⁒", // U+2052: COMMERCIAL MINUS SIGN
3934 "%", // U+FF05: FULLWIDTH PERCENT SIGN
3935 ],
3936 "%",
3937 ),
3938 (
3939 &[
3940 "&", // U+FF06: FULLWIDTH AMPERSAND
3941 ],
3942 "&",
3943 ),
3944 (
3945 &[
3946 "⁎", // U+204E: LOW ASTERISK
3947 "*", // U+FF0A: FULLWIDTH ASTERISK
3948 ],
3949 "*",
3950 ),
3951 (
3952 &[
3953 ",", // U+FF0C: FULLWIDTH COMMA
3954 ],
3955 ",",
3956 ),
3957 (
3958 &[
3959 ".", // U+FF0E: FULLWIDTH FULL STOP
3960 ],
3961 ".",
3962 ),
3963 (
3964 &[
3965 "⁄", // U+2044: FRACTION SLASH
3966 "/", // U+FF0F: FULLWIDTH SOLIDUS
3967 ],
3968 "/",
3969 ),
3970 (
3971 &[
3972 ":", // U+FF1A: FULLWIDTH COLON
3973 ],
3974 ":",
3975 ),
3976 (
3977 &[
3978 "⁏", // U+204F: REVERSED SEMICOLON
3979 ";", // U+FF1B: FULLWIDTH SEMICOLON
3980 ],
3981 ";",
3982 ),
3983 (
3984 &[
3985 "?", // U+FF1F: FULLWIDTH QUESTION MARK
3986 ],
3987 "?",
3988 ),
3989 (
3990 &[
3991 "⁇", // U+2047: DOUBLE QUESTION MARK
3992 ],
3993 "??",
3994 ),
3995 (
3996 &[
3997 "⁈", // U+2048: QUESTION EXCLAMATION MARK
3998 ],
3999 "?!",
4000 ),
4001 (
4002 &[
4003 "@", // U+FF20: FULLWIDTH COMMERCIAL AT
4004 ],
4005 "@",
4006 ),
4007 (
4008 &[
4009 "\", // U+FF3C: FULLWIDTH REVERSE SOLIDUS
4010 ],
4011 "\\",
4012 ),
4013 (
4014 &[
4015 "‸", // U+2038: CARET
4016 "^", // U+FF3E: FULLWIDTH CIRCUMFLEX ACCENT
4017 ],
4018 "^",
4019 ),
4020 (
4021 &[
4022 "_", // U+FF3F: FULLWIDTH LOW LINE
4023 ],
4024 "_",
4025 ),
4026 (
4027 &[
4028 "⁓", // U+2053: SWUNG DASH
4029 "~", // U+FF5E: FULLWIDTH TILDE
4030 ],
4031 "~",
4032 ),
4033 ];
4034
4035 for (characters, folded) in foldings {
4036 for &c in characters {
4037 assert_eq!(
4038 folding_using_raw_tokenizer_helper(c),
4039 folded,
4040 "testing that character \"{}\" becomes \"{}\"",
4041 c,
4042 folded
4043 );
4044 }
4045 }
4046 }
4047}