Skip to main content

ferray_strings/
classify.rs

1// ferray-strings: String classification functions (is* family)
2//
3// Elementwise boolean classification matching NumPy's
4// `numpy.strings.isalpha`, `numpy.strings.isdigit`, etc.
5//
6// ## REQ status
7// All classification predicates below match CPython `str.is*()` element
8// by element, which is exactly what `numpy.strings`/`numpy.char` delegate
9// to. The non-ASCII / category-sensitive predicates (`isalpha`,
10// `isdecimal`, `isdigit`, `isupper`, `islower`, `istitle`, `isspace`)
11// are backed by sorted, non-overlapping codepoint-range tables derived
12// LIVE from the oracle CPython 3.13 / Unicode 15.1.0 (the version backing
13// the installed numpy 2.4 build this crate is verified against), because
14// Rust `std` and third-party Unicode crates track a *newer* Unicode
15// revision and would classify ~9000 codepoints (assigned post-15.1.0)
16// differently from the numpy oracle. `isnumeric` is likewise backed by a
17// 15.1.0-locked table (`NUMERIC_TYPE_RANGES`): Rust `char::is_numeric`
18// tests the general categories `Nd|Nl|No`, which does NOT coincide with
19// the Unicode `Numeric_Type` property CPython `str.isnumeric()` uses — it
20// MISSES ideographic numerals like `一` (U+4E00, category `Lo`,
21// `Numeric_Type=Numeric`) and ADDS codepoints assigned after 15.1.0.
22
23use ferray_core::Array;
24use ferray_core::dimension::Dimension;
25use ferray_core::error::FerrayResult;
26
27use crate::string_array::StringArray;
28
29/// Binary-search a sorted, non-overlapping list of inclusive codepoint
30/// ranges for `c`.
31fn in_ranges(table: &[(char, char)], c: char) -> bool {
32    table
33        .binary_search_by(|&(lo, hi)| {
34            if c < lo {
35                core::cmp::Ordering::Greater
36            } else if c > hi {
37                core::cmp::Ordering::Less
38            } else {
39                core::cmp::Ordering::Equal
40            }
41        })
42        .is_ok()
43}
44
45/// Inclusive codepoint ranges of characters with Unicode
46/// `Numeric_Type` of Decimal **or** Digit — i.e. exactly the set for
47/// which CPython's `str.isdigit()` returns `True` for a single
48/// character (`Py_UNICODE_ISDIGIT`). This is broader than the Decimal-
49/// only `isdecimal` (it adds superscripts/subscripts ²³⁴, circled ①,
50/// parenthesized ⑴, etc.) and narrower than `isnumeric` (it excludes
51/// fractions ½ and letter-numerals like Ⅻ).
52///
53/// Derived live from the oracle CPython 3.13 / Unicode 15.1.0 (the
54/// version backing the installed `numpy` 2.4 build that this crate is
55/// verified against):
56/// `[c for c in range(0x110000) if chr(c).isdigit()]`, run-length
57/// compressed to 83 ranges. Rust's `std` exposes no `Numeric_Type`
58/// predicate (`char::is_numeric` covers `Nd|Nl|No` — too broad — and
59/// `char::to_digit(10)` is ASCII-only — too narrow), so the table is
60/// embedded directly.
61const DIGIT_RANGES: &[(char, char)] = &[
62    ('\u{0030}', '\u{0039}'),
63    ('\u{00B2}', '\u{00B3}'),
64    ('\u{00B9}', '\u{00B9}'),
65    ('\u{0660}', '\u{0669}'),
66    ('\u{06F0}', '\u{06F9}'),
67    ('\u{07C0}', '\u{07C9}'),
68    ('\u{0966}', '\u{096F}'),
69    ('\u{09E6}', '\u{09EF}'),
70    ('\u{0A66}', '\u{0A6F}'),
71    ('\u{0AE6}', '\u{0AEF}'),
72    ('\u{0B66}', '\u{0B6F}'),
73    ('\u{0BE6}', '\u{0BEF}'),
74    ('\u{0C66}', '\u{0C6F}'),
75    ('\u{0CE6}', '\u{0CEF}'),
76    ('\u{0D66}', '\u{0D6F}'),
77    ('\u{0DE6}', '\u{0DEF}'),
78    ('\u{0E50}', '\u{0E59}'),
79    ('\u{0ED0}', '\u{0ED9}'),
80    ('\u{0F20}', '\u{0F29}'),
81    ('\u{1040}', '\u{1049}'),
82    ('\u{1090}', '\u{1099}'),
83    ('\u{1369}', '\u{1371}'),
84    ('\u{17E0}', '\u{17E9}'),
85    ('\u{1810}', '\u{1819}'),
86    ('\u{1946}', '\u{194F}'),
87    ('\u{19D0}', '\u{19DA}'),
88    ('\u{1A80}', '\u{1A89}'),
89    ('\u{1A90}', '\u{1A99}'),
90    ('\u{1B50}', '\u{1B59}'),
91    ('\u{1BB0}', '\u{1BB9}'),
92    ('\u{1C40}', '\u{1C49}'),
93    ('\u{1C50}', '\u{1C59}'),
94    ('\u{2070}', '\u{2070}'),
95    ('\u{2074}', '\u{2079}'),
96    ('\u{2080}', '\u{2089}'),
97    ('\u{2460}', '\u{2468}'),
98    ('\u{2474}', '\u{247C}'),
99    ('\u{2488}', '\u{2490}'),
100    ('\u{24EA}', '\u{24EA}'),
101    ('\u{24F5}', '\u{24FD}'),
102    ('\u{24FF}', '\u{24FF}'),
103    ('\u{2776}', '\u{277E}'),
104    ('\u{2780}', '\u{2788}'),
105    ('\u{278A}', '\u{2792}'),
106    ('\u{A620}', '\u{A629}'),
107    ('\u{A8D0}', '\u{A8D9}'),
108    ('\u{A900}', '\u{A909}'),
109    ('\u{A9D0}', '\u{A9D9}'),
110    ('\u{A9F0}', '\u{A9F9}'),
111    ('\u{AA50}', '\u{AA59}'),
112    ('\u{ABF0}', '\u{ABF9}'),
113    ('\u{FF10}', '\u{FF19}'),
114    ('\u{104A0}', '\u{104A9}'),
115    ('\u{10A40}', '\u{10A43}'),
116    ('\u{10D30}', '\u{10D39}'),
117    ('\u{10E60}', '\u{10E68}'),
118    ('\u{11052}', '\u{1105A}'),
119    ('\u{11066}', '\u{1106F}'),
120    ('\u{110F0}', '\u{110F9}'),
121    ('\u{11136}', '\u{1113F}'),
122    ('\u{111D0}', '\u{111D9}'),
123    ('\u{112F0}', '\u{112F9}'),
124    ('\u{11450}', '\u{11459}'),
125    ('\u{114D0}', '\u{114D9}'),
126    ('\u{11650}', '\u{11659}'),
127    ('\u{116C0}', '\u{116C9}'),
128    ('\u{11730}', '\u{11739}'),
129    ('\u{118E0}', '\u{118E9}'),
130    ('\u{11950}', '\u{11959}'),
131    ('\u{11C50}', '\u{11C59}'),
132    ('\u{11D50}', '\u{11D59}'),
133    ('\u{11DA0}', '\u{11DA9}'),
134    ('\u{11F50}', '\u{11F59}'),
135    ('\u{16A60}', '\u{16A69}'),
136    ('\u{16AC0}', '\u{16AC9}'),
137    ('\u{16B50}', '\u{16B59}'),
138    ('\u{1D7CE}', '\u{1D7FF}'),
139    ('\u{1E140}', '\u{1E149}'),
140    ('\u{1E2F0}', '\u{1E2F9}'),
141    ('\u{1E4F0}', '\u{1E4F9}'),
142    ('\u{1E950}', '\u{1E959}'),
143    ('\u{1F100}', '\u{1F10A}'),
144    ('\u{1FBF0}', '\u{1FBF9}'),
145];
146
147/// Inclusive codepoint ranges of characters with Unicode
148/// `Numeric_Type` of Decimal — i.e. the set for which CPython's
149/// `str.isdecimal()` returns `True` for a single character
150/// (`Py_UNICODE_ISDECIMAL`). This is the Decimal-only subset of
151/// [`DIGIT_RANGES`]: it drops superscripts/subscripts (`²³⁴`), circled
152/// (`①`) and parenthesized (`⑴`) digits, keeping only true positional
153/// decimal digits (ASCII `0-9`, fullwidth `0-9`, Arabic-Indic `٠-٩`,
154/// Devanagari, etc.).
155///
156/// Derived live from the oracle CPython 3.13 / Unicode 15.1.0:
157/// `[c for c in range(0x110000) if chr(c).isdecimal()]`, run-length
158/// compressed to 64 ranges.
159const DECIMAL_RANGES: &[(char, char)] = &[
160    ('\u{0030}', '\u{0039}'),
161    ('\u{0660}', '\u{0669}'),
162    ('\u{06F0}', '\u{06F9}'),
163    ('\u{07C0}', '\u{07C9}'),
164    ('\u{0966}', '\u{096F}'),
165    ('\u{09E6}', '\u{09EF}'),
166    ('\u{0A66}', '\u{0A6F}'),
167    ('\u{0AE6}', '\u{0AEF}'),
168    ('\u{0B66}', '\u{0B6F}'),
169    ('\u{0BE6}', '\u{0BEF}'),
170    ('\u{0C66}', '\u{0C6F}'),
171    ('\u{0CE6}', '\u{0CEF}'),
172    ('\u{0D66}', '\u{0D6F}'),
173    ('\u{0DE6}', '\u{0DEF}'),
174    ('\u{0E50}', '\u{0E59}'),
175    ('\u{0ED0}', '\u{0ED9}'),
176    ('\u{0F20}', '\u{0F29}'),
177    ('\u{1040}', '\u{1049}'),
178    ('\u{1090}', '\u{1099}'),
179    ('\u{17E0}', '\u{17E9}'),
180    ('\u{1810}', '\u{1819}'),
181    ('\u{1946}', '\u{194F}'),
182    ('\u{19D0}', '\u{19D9}'),
183    ('\u{1A80}', '\u{1A89}'),
184    ('\u{1A90}', '\u{1A99}'),
185    ('\u{1B50}', '\u{1B59}'),
186    ('\u{1BB0}', '\u{1BB9}'),
187    ('\u{1C40}', '\u{1C49}'),
188    ('\u{1C50}', '\u{1C59}'),
189    ('\u{A620}', '\u{A629}'),
190    ('\u{A8D0}', '\u{A8D9}'),
191    ('\u{A900}', '\u{A909}'),
192    ('\u{A9D0}', '\u{A9D9}'),
193    ('\u{A9F0}', '\u{A9F9}'),
194    ('\u{AA50}', '\u{AA59}'),
195    ('\u{ABF0}', '\u{ABF9}'),
196    ('\u{FF10}', '\u{FF19}'),
197    ('\u{104A0}', '\u{104A9}'),
198    ('\u{10D30}', '\u{10D39}'),
199    ('\u{11066}', '\u{1106F}'),
200    ('\u{110F0}', '\u{110F9}'),
201    ('\u{11136}', '\u{1113F}'),
202    ('\u{111D0}', '\u{111D9}'),
203    ('\u{112F0}', '\u{112F9}'),
204    ('\u{11450}', '\u{11459}'),
205    ('\u{114D0}', '\u{114D9}'),
206    ('\u{11650}', '\u{11659}'),
207    ('\u{116C0}', '\u{116C9}'),
208    ('\u{11730}', '\u{11739}'),
209    ('\u{118E0}', '\u{118E9}'),
210    ('\u{11950}', '\u{11959}'),
211    ('\u{11C50}', '\u{11C59}'),
212    ('\u{11D50}', '\u{11D59}'),
213    ('\u{11DA0}', '\u{11DA9}'),
214    ('\u{11F50}', '\u{11F59}'),
215    ('\u{16A60}', '\u{16A69}'),
216    ('\u{16AC0}', '\u{16AC9}'),
217    ('\u{16B50}', '\u{16B59}'),
218    ('\u{1D7CE}', '\u{1D7FF}'),
219    ('\u{1E140}', '\u{1E149}'),
220    ('\u{1E2F0}', '\u{1E2F9}'),
221    ('\u{1E4F0}', '\u{1E4F9}'),
222    ('\u{1E950}', '\u{1E959}'),
223    ('\u{1FBF0}', '\u{1FBF9}'),
224];
225
226/// Inclusive codepoint ranges of characters in Unicode general category
227/// `L*` (`Lu | Ll | Lt | Lm | Lo`), i.e. exactly the set for which
228/// CPython's `str.isalpha()` returns `True` for a single character. This
229/// is NARROWER than Rust's `char::is_alphabetic` (the `Alphabetic`
230/// derived property, which also includes combining marks `Mn`, letter-
231/// numbers `Nl` such as roman numerals `Ⅻ`, etc.).
232///
233/// Derived live from the oracle CPython 3.13 / Unicode 15.1.0:
234/// `[c for c in range(0x110000) if chr(c).isalpha()]`, run-length
235/// compressed to 660 ranges.
236const ALPHA_RANGES: &[(char, char)] = &[
237    ('\u{0041}', '\u{005A}'),
238    ('\u{0061}', '\u{007A}'),
239    ('\u{00AA}', '\u{00AA}'),
240    ('\u{00B5}', '\u{00B5}'),
241    ('\u{00BA}', '\u{00BA}'),
242    ('\u{00C0}', '\u{00D6}'),
243    ('\u{00D8}', '\u{00F6}'),
244    ('\u{00F8}', '\u{02C1}'),
245    ('\u{02C6}', '\u{02D1}'),
246    ('\u{02E0}', '\u{02E4}'),
247    ('\u{02EC}', '\u{02EC}'),
248    ('\u{02EE}', '\u{02EE}'),
249    ('\u{0370}', '\u{0374}'),
250    ('\u{0376}', '\u{0377}'),
251    ('\u{037A}', '\u{037D}'),
252    ('\u{037F}', '\u{037F}'),
253    ('\u{0386}', '\u{0386}'),
254    ('\u{0388}', '\u{038A}'),
255    ('\u{038C}', '\u{038C}'),
256    ('\u{038E}', '\u{03A1}'),
257    ('\u{03A3}', '\u{03F5}'),
258    ('\u{03F7}', '\u{0481}'),
259    ('\u{048A}', '\u{052F}'),
260    ('\u{0531}', '\u{0556}'),
261    ('\u{0559}', '\u{0559}'),
262    ('\u{0560}', '\u{0588}'),
263    ('\u{05D0}', '\u{05EA}'),
264    ('\u{05EF}', '\u{05F2}'),
265    ('\u{0620}', '\u{064A}'),
266    ('\u{066E}', '\u{066F}'),
267    ('\u{0671}', '\u{06D3}'),
268    ('\u{06D5}', '\u{06D5}'),
269    ('\u{06E5}', '\u{06E6}'),
270    ('\u{06EE}', '\u{06EF}'),
271    ('\u{06FA}', '\u{06FC}'),
272    ('\u{06FF}', '\u{06FF}'),
273    ('\u{0710}', '\u{0710}'),
274    ('\u{0712}', '\u{072F}'),
275    ('\u{074D}', '\u{07A5}'),
276    ('\u{07B1}', '\u{07B1}'),
277    ('\u{07CA}', '\u{07EA}'),
278    ('\u{07F4}', '\u{07F5}'),
279    ('\u{07FA}', '\u{07FA}'),
280    ('\u{0800}', '\u{0815}'),
281    ('\u{081A}', '\u{081A}'),
282    ('\u{0824}', '\u{0824}'),
283    ('\u{0828}', '\u{0828}'),
284    ('\u{0840}', '\u{0858}'),
285    ('\u{0860}', '\u{086A}'),
286    ('\u{0870}', '\u{0887}'),
287    ('\u{0889}', '\u{088E}'),
288    ('\u{08A0}', '\u{08C9}'),
289    ('\u{0904}', '\u{0939}'),
290    ('\u{093D}', '\u{093D}'),
291    ('\u{0950}', '\u{0950}'),
292    ('\u{0958}', '\u{0961}'),
293    ('\u{0971}', '\u{0980}'),
294    ('\u{0985}', '\u{098C}'),
295    ('\u{098F}', '\u{0990}'),
296    ('\u{0993}', '\u{09A8}'),
297    ('\u{09AA}', '\u{09B0}'),
298    ('\u{09B2}', '\u{09B2}'),
299    ('\u{09B6}', '\u{09B9}'),
300    ('\u{09BD}', '\u{09BD}'),
301    ('\u{09CE}', '\u{09CE}'),
302    ('\u{09DC}', '\u{09DD}'),
303    ('\u{09DF}', '\u{09E1}'),
304    ('\u{09F0}', '\u{09F1}'),
305    ('\u{09FC}', '\u{09FC}'),
306    ('\u{0A05}', '\u{0A0A}'),
307    ('\u{0A0F}', '\u{0A10}'),
308    ('\u{0A13}', '\u{0A28}'),
309    ('\u{0A2A}', '\u{0A30}'),
310    ('\u{0A32}', '\u{0A33}'),
311    ('\u{0A35}', '\u{0A36}'),
312    ('\u{0A38}', '\u{0A39}'),
313    ('\u{0A59}', '\u{0A5C}'),
314    ('\u{0A5E}', '\u{0A5E}'),
315    ('\u{0A72}', '\u{0A74}'),
316    ('\u{0A85}', '\u{0A8D}'),
317    ('\u{0A8F}', '\u{0A91}'),
318    ('\u{0A93}', '\u{0AA8}'),
319    ('\u{0AAA}', '\u{0AB0}'),
320    ('\u{0AB2}', '\u{0AB3}'),
321    ('\u{0AB5}', '\u{0AB9}'),
322    ('\u{0ABD}', '\u{0ABD}'),
323    ('\u{0AD0}', '\u{0AD0}'),
324    ('\u{0AE0}', '\u{0AE1}'),
325    ('\u{0AF9}', '\u{0AF9}'),
326    ('\u{0B05}', '\u{0B0C}'),
327    ('\u{0B0F}', '\u{0B10}'),
328    ('\u{0B13}', '\u{0B28}'),
329    ('\u{0B2A}', '\u{0B30}'),
330    ('\u{0B32}', '\u{0B33}'),
331    ('\u{0B35}', '\u{0B39}'),
332    ('\u{0B3D}', '\u{0B3D}'),
333    ('\u{0B5C}', '\u{0B5D}'),
334    ('\u{0B5F}', '\u{0B61}'),
335    ('\u{0B71}', '\u{0B71}'),
336    ('\u{0B83}', '\u{0B83}'),
337    ('\u{0B85}', '\u{0B8A}'),
338    ('\u{0B8E}', '\u{0B90}'),
339    ('\u{0B92}', '\u{0B95}'),
340    ('\u{0B99}', '\u{0B9A}'),
341    ('\u{0B9C}', '\u{0B9C}'),
342    ('\u{0B9E}', '\u{0B9F}'),
343    ('\u{0BA3}', '\u{0BA4}'),
344    ('\u{0BA8}', '\u{0BAA}'),
345    ('\u{0BAE}', '\u{0BB9}'),
346    ('\u{0BD0}', '\u{0BD0}'),
347    ('\u{0C05}', '\u{0C0C}'),
348    ('\u{0C0E}', '\u{0C10}'),
349    ('\u{0C12}', '\u{0C28}'),
350    ('\u{0C2A}', '\u{0C39}'),
351    ('\u{0C3D}', '\u{0C3D}'),
352    ('\u{0C58}', '\u{0C5A}'),
353    ('\u{0C5D}', '\u{0C5D}'),
354    ('\u{0C60}', '\u{0C61}'),
355    ('\u{0C80}', '\u{0C80}'),
356    ('\u{0C85}', '\u{0C8C}'),
357    ('\u{0C8E}', '\u{0C90}'),
358    ('\u{0C92}', '\u{0CA8}'),
359    ('\u{0CAA}', '\u{0CB3}'),
360    ('\u{0CB5}', '\u{0CB9}'),
361    ('\u{0CBD}', '\u{0CBD}'),
362    ('\u{0CDD}', '\u{0CDE}'),
363    ('\u{0CE0}', '\u{0CE1}'),
364    ('\u{0CF1}', '\u{0CF2}'),
365    ('\u{0D04}', '\u{0D0C}'),
366    ('\u{0D0E}', '\u{0D10}'),
367    ('\u{0D12}', '\u{0D3A}'),
368    ('\u{0D3D}', '\u{0D3D}'),
369    ('\u{0D4E}', '\u{0D4E}'),
370    ('\u{0D54}', '\u{0D56}'),
371    ('\u{0D5F}', '\u{0D61}'),
372    ('\u{0D7A}', '\u{0D7F}'),
373    ('\u{0D85}', '\u{0D96}'),
374    ('\u{0D9A}', '\u{0DB1}'),
375    ('\u{0DB3}', '\u{0DBB}'),
376    ('\u{0DBD}', '\u{0DBD}'),
377    ('\u{0DC0}', '\u{0DC6}'),
378    ('\u{0E01}', '\u{0E30}'),
379    ('\u{0E32}', '\u{0E33}'),
380    ('\u{0E40}', '\u{0E46}'),
381    ('\u{0E81}', '\u{0E82}'),
382    ('\u{0E84}', '\u{0E84}'),
383    ('\u{0E86}', '\u{0E8A}'),
384    ('\u{0E8C}', '\u{0EA3}'),
385    ('\u{0EA5}', '\u{0EA5}'),
386    ('\u{0EA7}', '\u{0EB0}'),
387    ('\u{0EB2}', '\u{0EB3}'),
388    ('\u{0EBD}', '\u{0EBD}'),
389    ('\u{0EC0}', '\u{0EC4}'),
390    ('\u{0EC6}', '\u{0EC6}'),
391    ('\u{0EDC}', '\u{0EDF}'),
392    ('\u{0F00}', '\u{0F00}'),
393    ('\u{0F40}', '\u{0F47}'),
394    ('\u{0F49}', '\u{0F6C}'),
395    ('\u{0F88}', '\u{0F8C}'),
396    ('\u{1000}', '\u{102A}'),
397    ('\u{103F}', '\u{103F}'),
398    ('\u{1050}', '\u{1055}'),
399    ('\u{105A}', '\u{105D}'),
400    ('\u{1061}', '\u{1061}'),
401    ('\u{1065}', '\u{1066}'),
402    ('\u{106E}', '\u{1070}'),
403    ('\u{1075}', '\u{1081}'),
404    ('\u{108E}', '\u{108E}'),
405    ('\u{10A0}', '\u{10C5}'),
406    ('\u{10C7}', '\u{10C7}'),
407    ('\u{10CD}', '\u{10CD}'),
408    ('\u{10D0}', '\u{10FA}'),
409    ('\u{10FC}', '\u{1248}'),
410    ('\u{124A}', '\u{124D}'),
411    ('\u{1250}', '\u{1256}'),
412    ('\u{1258}', '\u{1258}'),
413    ('\u{125A}', '\u{125D}'),
414    ('\u{1260}', '\u{1288}'),
415    ('\u{128A}', '\u{128D}'),
416    ('\u{1290}', '\u{12B0}'),
417    ('\u{12B2}', '\u{12B5}'),
418    ('\u{12B8}', '\u{12BE}'),
419    ('\u{12C0}', '\u{12C0}'),
420    ('\u{12C2}', '\u{12C5}'),
421    ('\u{12C8}', '\u{12D6}'),
422    ('\u{12D8}', '\u{1310}'),
423    ('\u{1312}', '\u{1315}'),
424    ('\u{1318}', '\u{135A}'),
425    ('\u{1380}', '\u{138F}'),
426    ('\u{13A0}', '\u{13F5}'),
427    ('\u{13F8}', '\u{13FD}'),
428    ('\u{1401}', '\u{166C}'),
429    ('\u{166F}', '\u{167F}'),
430    ('\u{1681}', '\u{169A}'),
431    ('\u{16A0}', '\u{16EA}'),
432    ('\u{16F1}', '\u{16F8}'),
433    ('\u{1700}', '\u{1711}'),
434    ('\u{171F}', '\u{1731}'),
435    ('\u{1740}', '\u{1751}'),
436    ('\u{1760}', '\u{176C}'),
437    ('\u{176E}', '\u{1770}'),
438    ('\u{1780}', '\u{17B3}'),
439    ('\u{17D7}', '\u{17D7}'),
440    ('\u{17DC}', '\u{17DC}'),
441    ('\u{1820}', '\u{1878}'),
442    ('\u{1880}', '\u{1884}'),
443    ('\u{1887}', '\u{18A8}'),
444    ('\u{18AA}', '\u{18AA}'),
445    ('\u{18B0}', '\u{18F5}'),
446    ('\u{1900}', '\u{191E}'),
447    ('\u{1950}', '\u{196D}'),
448    ('\u{1970}', '\u{1974}'),
449    ('\u{1980}', '\u{19AB}'),
450    ('\u{19B0}', '\u{19C9}'),
451    ('\u{1A00}', '\u{1A16}'),
452    ('\u{1A20}', '\u{1A54}'),
453    ('\u{1AA7}', '\u{1AA7}'),
454    ('\u{1B05}', '\u{1B33}'),
455    ('\u{1B45}', '\u{1B4C}'),
456    ('\u{1B83}', '\u{1BA0}'),
457    ('\u{1BAE}', '\u{1BAF}'),
458    ('\u{1BBA}', '\u{1BE5}'),
459    ('\u{1C00}', '\u{1C23}'),
460    ('\u{1C4D}', '\u{1C4F}'),
461    ('\u{1C5A}', '\u{1C7D}'),
462    ('\u{1C80}', '\u{1C88}'),
463    ('\u{1C90}', '\u{1CBA}'),
464    ('\u{1CBD}', '\u{1CBF}'),
465    ('\u{1CE9}', '\u{1CEC}'),
466    ('\u{1CEE}', '\u{1CF3}'),
467    ('\u{1CF5}', '\u{1CF6}'),
468    ('\u{1CFA}', '\u{1CFA}'),
469    ('\u{1D00}', '\u{1DBF}'),
470    ('\u{1E00}', '\u{1F15}'),
471    ('\u{1F18}', '\u{1F1D}'),
472    ('\u{1F20}', '\u{1F45}'),
473    ('\u{1F48}', '\u{1F4D}'),
474    ('\u{1F50}', '\u{1F57}'),
475    ('\u{1F59}', '\u{1F59}'),
476    ('\u{1F5B}', '\u{1F5B}'),
477    ('\u{1F5D}', '\u{1F5D}'),
478    ('\u{1F5F}', '\u{1F7D}'),
479    ('\u{1F80}', '\u{1FB4}'),
480    ('\u{1FB6}', '\u{1FBC}'),
481    ('\u{1FBE}', '\u{1FBE}'),
482    ('\u{1FC2}', '\u{1FC4}'),
483    ('\u{1FC6}', '\u{1FCC}'),
484    ('\u{1FD0}', '\u{1FD3}'),
485    ('\u{1FD6}', '\u{1FDB}'),
486    ('\u{1FE0}', '\u{1FEC}'),
487    ('\u{1FF2}', '\u{1FF4}'),
488    ('\u{1FF6}', '\u{1FFC}'),
489    ('\u{2071}', '\u{2071}'),
490    ('\u{207F}', '\u{207F}'),
491    ('\u{2090}', '\u{209C}'),
492    ('\u{2102}', '\u{2102}'),
493    ('\u{2107}', '\u{2107}'),
494    ('\u{210A}', '\u{2113}'),
495    ('\u{2115}', '\u{2115}'),
496    ('\u{2119}', '\u{211D}'),
497    ('\u{2124}', '\u{2124}'),
498    ('\u{2126}', '\u{2126}'),
499    ('\u{2128}', '\u{2128}'),
500    ('\u{212A}', '\u{212D}'),
501    ('\u{212F}', '\u{2139}'),
502    ('\u{213C}', '\u{213F}'),
503    ('\u{2145}', '\u{2149}'),
504    ('\u{214E}', '\u{214E}'),
505    ('\u{2183}', '\u{2184}'),
506    ('\u{2C00}', '\u{2CE4}'),
507    ('\u{2CEB}', '\u{2CEE}'),
508    ('\u{2CF2}', '\u{2CF3}'),
509    ('\u{2D00}', '\u{2D25}'),
510    ('\u{2D27}', '\u{2D27}'),
511    ('\u{2D2D}', '\u{2D2D}'),
512    ('\u{2D30}', '\u{2D67}'),
513    ('\u{2D6F}', '\u{2D6F}'),
514    ('\u{2D80}', '\u{2D96}'),
515    ('\u{2DA0}', '\u{2DA6}'),
516    ('\u{2DA8}', '\u{2DAE}'),
517    ('\u{2DB0}', '\u{2DB6}'),
518    ('\u{2DB8}', '\u{2DBE}'),
519    ('\u{2DC0}', '\u{2DC6}'),
520    ('\u{2DC8}', '\u{2DCE}'),
521    ('\u{2DD0}', '\u{2DD6}'),
522    ('\u{2DD8}', '\u{2DDE}'),
523    ('\u{2E2F}', '\u{2E2F}'),
524    ('\u{3005}', '\u{3006}'),
525    ('\u{3031}', '\u{3035}'),
526    ('\u{303B}', '\u{303C}'),
527    ('\u{3041}', '\u{3096}'),
528    ('\u{309D}', '\u{309F}'),
529    ('\u{30A1}', '\u{30FA}'),
530    ('\u{30FC}', '\u{30FF}'),
531    ('\u{3105}', '\u{312F}'),
532    ('\u{3131}', '\u{318E}'),
533    ('\u{31A0}', '\u{31BF}'),
534    ('\u{31F0}', '\u{31FF}'),
535    ('\u{3400}', '\u{4DBF}'),
536    ('\u{4E00}', '\u{A48C}'),
537    ('\u{A4D0}', '\u{A4FD}'),
538    ('\u{A500}', '\u{A60C}'),
539    ('\u{A610}', '\u{A61F}'),
540    ('\u{A62A}', '\u{A62B}'),
541    ('\u{A640}', '\u{A66E}'),
542    ('\u{A67F}', '\u{A69D}'),
543    ('\u{A6A0}', '\u{A6E5}'),
544    ('\u{A717}', '\u{A71F}'),
545    ('\u{A722}', '\u{A788}'),
546    ('\u{A78B}', '\u{A7CA}'),
547    ('\u{A7D0}', '\u{A7D1}'),
548    ('\u{A7D3}', '\u{A7D3}'),
549    ('\u{A7D5}', '\u{A7D9}'),
550    ('\u{A7F2}', '\u{A801}'),
551    ('\u{A803}', '\u{A805}'),
552    ('\u{A807}', '\u{A80A}'),
553    ('\u{A80C}', '\u{A822}'),
554    ('\u{A840}', '\u{A873}'),
555    ('\u{A882}', '\u{A8B3}'),
556    ('\u{A8F2}', '\u{A8F7}'),
557    ('\u{A8FB}', '\u{A8FB}'),
558    ('\u{A8FD}', '\u{A8FE}'),
559    ('\u{A90A}', '\u{A925}'),
560    ('\u{A930}', '\u{A946}'),
561    ('\u{A960}', '\u{A97C}'),
562    ('\u{A984}', '\u{A9B2}'),
563    ('\u{A9CF}', '\u{A9CF}'),
564    ('\u{A9E0}', '\u{A9E4}'),
565    ('\u{A9E6}', '\u{A9EF}'),
566    ('\u{A9FA}', '\u{A9FE}'),
567    ('\u{AA00}', '\u{AA28}'),
568    ('\u{AA40}', '\u{AA42}'),
569    ('\u{AA44}', '\u{AA4B}'),
570    ('\u{AA60}', '\u{AA76}'),
571    ('\u{AA7A}', '\u{AA7A}'),
572    ('\u{AA7E}', '\u{AAAF}'),
573    ('\u{AAB1}', '\u{AAB1}'),
574    ('\u{AAB5}', '\u{AAB6}'),
575    ('\u{AAB9}', '\u{AABD}'),
576    ('\u{AAC0}', '\u{AAC0}'),
577    ('\u{AAC2}', '\u{AAC2}'),
578    ('\u{AADB}', '\u{AADD}'),
579    ('\u{AAE0}', '\u{AAEA}'),
580    ('\u{AAF2}', '\u{AAF4}'),
581    ('\u{AB01}', '\u{AB06}'),
582    ('\u{AB09}', '\u{AB0E}'),
583    ('\u{AB11}', '\u{AB16}'),
584    ('\u{AB20}', '\u{AB26}'),
585    ('\u{AB28}', '\u{AB2E}'),
586    ('\u{AB30}', '\u{AB5A}'),
587    ('\u{AB5C}', '\u{AB69}'),
588    ('\u{AB70}', '\u{ABE2}'),
589    ('\u{AC00}', '\u{D7A3}'),
590    ('\u{D7B0}', '\u{D7C6}'),
591    ('\u{D7CB}', '\u{D7FB}'),
592    ('\u{F900}', '\u{FA6D}'),
593    ('\u{FA70}', '\u{FAD9}'),
594    ('\u{FB00}', '\u{FB06}'),
595    ('\u{FB13}', '\u{FB17}'),
596    ('\u{FB1D}', '\u{FB1D}'),
597    ('\u{FB1F}', '\u{FB28}'),
598    ('\u{FB2A}', '\u{FB36}'),
599    ('\u{FB38}', '\u{FB3C}'),
600    ('\u{FB3E}', '\u{FB3E}'),
601    ('\u{FB40}', '\u{FB41}'),
602    ('\u{FB43}', '\u{FB44}'),
603    ('\u{FB46}', '\u{FBB1}'),
604    ('\u{FBD3}', '\u{FD3D}'),
605    ('\u{FD50}', '\u{FD8F}'),
606    ('\u{FD92}', '\u{FDC7}'),
607    ('\u{FDF0}', '\u{FDFB}'),
608    ('\u{FE70}', '\u{FE74}'),
609    ('\u{FE76}', '\u{FEFC}'),
610    ('\u{FF21}', '\u{FF3A}'),
611    ('\u{FF41}', '\u{FF5A}'),
612    ('\u{FF66}', '\u{FFBE}'),
613    ('\u{FFC2}', '\u{FFC7}'),
614    ('\u{FFCA}', '\u{FFCF}'),
615    ('\u{FFD2}', '\u{FFD7}'),
616    ('\u{FFDA}', '\u{FFDC}'),
617    ('\u{10000}', '\u{1000B}'),
618    ('\u{1000D}', '\u{10026}'),
619    ('\u{10028}', '\u{1003A}'),
620    ('\u{1003C}', '\u{1003D}'),
621    ('\u{1003F}', '\u{1004D}'),
622    ('\u{10050}', '\u{1005D}'),
623    ('\u{10080}', '\u{100FA}'),
624    ('\u{10280}', '\u{1029C}'),
625    ('\u{102A0}', '\u{102D0}'),
626    ('\u{10300}', '\u{1031F}'),
627    ('\u{1032D}', '\u{10340}'),
628    ('\u{10342}', '\u{10349}'),
629    ('\u{10350}', '\u{10375}'),
630    ('\u{10380}', '\u{1039D}'),
631    ('\u{103A0}', '\u{103C3}'),
632    ('\u{103C8}', '\u{103CF}'),
633    ('\u{10400}', '\u{1049D}'),
634    ('\u{104B0}', '\u{104D3}'),
635    ('\u{104D8}', '\u{104FB}'),
636    ('\u{10500}', '\u{10527}'),
637    ('\u{10530}', '\u{10563}'),
638    ('\u{10570}', '\u{1057A}'),
639    ('\u{1057C}', '\u{1058A}'),
640    ('\u{1058C}', '\u{10592}'),
641    ('\u{10594}', '\u{10595}'),
642    ('\u{10597}', '\u{105A1}'),
643    ('\u{105A3}', '\u{105B1}'),
644    ('\u{105B3}', '\u{105B9}'),
645    ('\u{105BB}', '\u{105BC}'),
646    ('\u{10600}', '\u{10736}'),
647    ('\u{10740}', '\u{10755}'),
648    ('\u{10760}', '\u{10767}'),
649    ('\u{10780}', '\u{10785}'),
650    ('\u{10787}', '\u{107B0}'),
651    ('\u{107B2}', '\u{107BA}'),
652    ('\u{10800}', '\u{10805}'),
653    ('\u{10808}', '\u{10808}'),
654    ('\u{1080A}', '\u{10835}'),
655    ('\u{10837}', '\u{10838}'),
656    ('\u{1083C}', '\u{1083C}'),
657    ('\u{1083F}', '\u{10855}'),
658    ('\u{10860}', '\u{10876}'),
659    ('\u{10880}', '\u{1089E}'),
660    ('\u{108E0}', '\u{108F2}'),
661    ('\u{108F4}', '\u{108F5}'),
662    ('\u{10900}', '\u{10915}'),
663    ('\u{10920}', '\u{10939}'),
664    ('\u{10980}', '\u{109B7}'),
665    ('\u{109BE}', '\u{109BF}'),
666    ('\u{10A00}', '\u{10A00}'),
667    ('\u{10A10}', '\u{10A13}'),
668    ('\u{10A15}', '\u{10A17}'),
669    ('\u{10A19}', '\u{10A35}'),
670    ('\u{10A60}', '\u{10A7C}'),
671    ('\u{10A80}', '\u{10A9C}'),
672    ('\u{10AC0}', '\u{10AC7}'),
673    ('\u{10AC9}', '\u{10AE4}'),
674    ('\u{10B00}', '\u{10B35}'),
675    ('\u{10B40}', '\u{10B55}'),
676    ('\u{10B60}', '\u{10B72}'),
677    ('\u{10B80}', '\u{10B91}'),
678    ('\u{10C00}', '\u{10C48}'),
679    ('\u{10C80}', '\u{10CB2}'),
680    ('\u{10CC0}', '\u{10CF2}'),
681    ('\u{10D00}', '\u{10D23}'),
682    ('\u{10E80}', '\u{10EA9}'),
683    ('\u{10EB0}', '\u{10EB1}'),
684    ('\u{10F00}', '\u{10F1C}'),
685    ('\u{10F27}', '\u{10F27}'),
686    ('\u{10F30}', '\u{10F45}'),
687    ('\u{10F70}', '\u{10F81}'),
688    ('\u{10FB0}', '\u{10FC4}'),
689    ('\u{10FE0}', '\u{10FF6}'),
690    ('\u{11003}', '\u{11037}'),
691    ('\u{11071}', '\u{11072}'),
692    ('\u{11075}', '\u{11075}'),
693    ('\u{11083}', '\u{110AF}'),
694    ('\u{110D0}', '\u{110E8}'),
695    ('\u{11103}', '\u{11126}'),
696    ('\u{11144}', '\u{11144}'),
697    ('\u{11147}', '\u{11147}'),
698    ('\u{11150}', '\u{11172}'),
699    ('\u{11176}', '\u{11176}'),
700    ('\u{11183}', '\u{111B2}'),
701    ('\u{111C1}', '\u{111C4}'),
702    ('\u{111DA}', '\u{111DA}'),
703    ('\u{111DC}', '\u{111DC}'),
704    ('\u{11200}', '\u{11211}'),
705    ('\u{11213}', '\u{1122B}'),
706    ('\u{1123F}', '\u{11240}'),
707    ('\u{11280}', '\u{11286}'),
708    ('\u{11288}', '\u{11288}'),
709    ('\u{1128A}', '\u{1128D}'),
710    ('\u{1128F}', '\u{1129D}'),
711    ('\u{1129F}', '\u{112A8}'),
712    ('\u{112B0}', '\u{112DE}'),
713    ('\u{11305}', '\u{1130C}'),
714    ('\u{1130F}', '\u{11310}'),
715    ('\u{11313}', '\u{11328}'),
716    ('\u{1132A}', '\u{11330}'),
717    ('\u{11332}', '\u{11333}'),
718    ('\u{11335}', '\u{11339}'),
719    ('\u{1133D}', '\u{1133D}'),
720    ('\u{11350}', '\u{11350}'),
721    ('\u{1135D}', '\u{11361}'),
722    ('\u{11400}', '\u{11434}'),
723    ('\u{11447}', '\u{1144A}'),
724    ('\u{1145F}', '\u{11461}'),
725    ('\u{11480}', '\u{114AF}'),
726    ('\u{114C4}', '\u{114C5}'),
727    ('\u{114C7}', '\u{114C7}'),
728    ('\u{11580}', '\u{115AE}'),
729    ('\u{115D8}', '\u{115DB}'),
730    ('\u{11600}', '\u{1162F}'),
731    ('\u{11644}', '\u{11644}'),
732    ('\u{11680}', '\u{116AA}'),
733    ('\u{116B8}', '\u{116B8}'),
734    ('\u{11700}', '\u{1171A}'),
735    ('\u{11740}', '\u{11746}'),
736    ('\u{11800}', '\u{1182B}'),
737    ('\u{118A0}', '\u{118DF}'),
738    ('\u{118FF}', '\u{11906}'),
739    ('\u{11909}', '\u{11909}'),
740    ('\u{1190C}', '\u{11913}'),
741    ('\u{11915}', '\u{11916}'),
742    ('\u{11918}', '\u{1192F}'),
743    ('\u{1193F}', '\u{1193F}'),
744    ('\u{11941}', '\u{11941}'),
745    ('\u{119A0}', '\u{119A7}'),
746    ('\u{119AA}', '\u{119D0}'),
747    ('\u{119E1}', '\u{119E1}'),
748    ('\u{119E3}', '\u{119E3}'),
749    ('\u{11A00}', '\u{11A00}'),
750    ('\u{11A0B}', '\u{11A32}'),
751    ('\u{11A3A}', '\u{11A3A}'),
752    ('\u{11A50}', '\u{11A50}'),
753    ('\u{11A5C}', '\u{11A89}'),
754    ('\u{11A9D}', '\u{11A9D}'),
755    ('\u{11AB0}', '\u{11AF8}'),
756    ('\u{11C00}', '\u{11C08}'),
757    ('\u{11C0A}', '\u{11C2E}'),
758    ('\u{11C40}', '\u{11C40}'),
759    ('\u{11C72}', '\u{11C8F}'),
760    ('\u{11D00}', '\u{11D06}'),
761    ('\u{11D08}', '\u{11D09}'),
762    ('\u{11D0B}', '\u{11D30}'),
763    ('\u{11D46}', '\u{11D46}'),
764    ('\u{11D60}', '\u{11D65}'),
765    ('\u{11D67}', '\u{11D68}'),
766    ('\u{11D6A}', '\u{11D89}'),
767    ('\u{11D98}', '\u{11D98}'),
768    ('\u{11EE0}', '\u{11EF2}'),
769    ('\u{11F02}', '\u{11F02}'),
770    ('\u{11F04}', '\u{11F10}'),
771    ('\u{11F12}', '\u{11F33}'),
772    ('\u{11FB0}', '\u{11FB0}'),
773    ('\u{12000}', '\u{12399}'),
774    ('\u{12480}', '\u{12543}'),
775    ('\u{12F90}', '\u{12FF0}'),
776    ('\u{13000}', '\u{1342F}'),
777    ('\u{13441}', '\u{13446}'),
778    ('\u{14400}', '\u{14646}'),
779    ('\u{16800}', '\u{16A38}'),
780    ('\u{16A40}', '\u{16A5E}'),
781    ('\u{16A70}', '\u{16ABE}'),
782    ('\u{16AD0}', '\u{16AED}'),
783    ('\u{16B00}', '\u{16B2F}'),
784    ('\u{16B40}', '\u{16B43}'),
785    ('\u{16B63}', '\u{16B77}'),
786    ('\u{16B7D}', '\u{16B8F}'),
787    ('\u{16E40}', '\u{16E7F}'),
788    ('\u{16F00}', '\u{16F4A}'),
789    ('\u{16F50}', '\u{16F50}'),
790    ('\u{16F93}', '\u{16F9F}'),
791    ('\u{16FE0}', '\u{16FE1}'),
792    ('\u{16FE3}', '\u{16FE3}'),
793    ('\u{17000}', '\u{187F7}'),
794    ('\u{18800}', '\u{18CD5}'),
795    ('\u{18D00}', '\u{18D08}'),
796    ('\u{1AFF0}', '\u{1AFF3}'),
797    ('\u{1AFF5}', '\u{1AFFB}'),
798    ('\u{1AFFD}', '\u{1AFFE}'),
799    ('\u{1B000}', '\u{1B122}'),
800    ('\u{1B132}', '\u{1B132}'),
801    ('\u{1B150}', '\u{1B152}'),
802    ('\u{1B155}', '\u{1B155}'),
803    ('\u{1B164}', '\u{1B167}'),
804    ('\u{1B170}', '\u{1B2FB}'),
805    ('\u{1BC00}', '\u{1BC6A}'),
806    ('\u{1BC70}', '\u{1BC7C}'),
807    ('\u{1BC80}', '\u{1BC88}'),
808    ('\u{1BC90}', '\u{1BC99}'),
809    ('\u{1D400}', '\u{1D454}'),
810    ('\u{1D456}', '\u{1D49C}'),
811    ('\u{1D49E}', '\u{1D49F}'),
812    ('\u{1D4A2}', '\u{1D4A2}'),
813    ('\u{1D4A5}', '\u{1D4A6}'),
814    ('\u{1D4A9}', '\u{1D4AC}'),
815    ('\u{1D4AE}', '\u{1D4B9}'),
816    ('\u{1D4BB}', '\u{1D4BB}'),
817    ('\u{1D4BD}', '\u{1D4C3}'),
818    ('\u{1D4C5}', '\u{1D505}'),
819    ('\u{1D507}', '\u{1D50A}'),
820    ('\u{1D50D}', '\u{1D514}'),
821    ('\u{1D516}', '\u{1D51C}'),
822    ('\u{1D51E}', '\u{1D539}'),
823    ('\u{1D53B}', '\u{1D53E}'),
824    ('\u{1D540}', '\u{1D544}'),
825    ('\u{1D546}', '\u{1D546}'),
826    ('\u{1D54A}', '\u{1D550}'),
827    ('\u{1D552}', '\u{1D6A5}'),
828    ('\u{1D6A8}', '\u{1D6C0}'),
829    ('\u{1D6C2}', '\u{1D6DA}'),
830    ('\u{1D6DC}', '\u{1D6FA}'),
831    ('\u{1D6FC}', '\u{1D714}'),
832    ('\u{1D716}', '\u{1D734}'),
833    ('\u{1D736}', '\u{1D74E}'),
834    ('\u{1D750}', '\u{1D76E}'),
835    ('\u{1D770}', '\u{1D788}'),
836    ('\u{1D78A}', '\u{1D7A8}'),
837    ('\u{1D7AA}', '\u{1D7C2}'),
838    ('\u{1D7C4}', '\u{1D7CB}'),
839    ('\u{1DF00}', '\u{1DF1E}'),
840    ('\u{1DF25}', '\u{1DF2A}'),
841    ('\u{1E030}', '\u{1E06D}'),
842    ('\u{1E100}', '\u{1E12C}'),
843    ('\u{1E137}', '\u{1E13D}'),
844    ('\u{1E14E}', '\u{1E14E}'),
845    ('\u{1E290}', '\u{1E2AD}'),
846    ('\u{1E2C0}', '\u{1E2EB}'),
847    ('\u{1E4D0}', '\u{1E4EB}'),
848    ('\u{1E7E0}', '\u{1E7E6}'),
849    ('\u{1E7E8}', '\u{1E7EB}'),
850    ('\u{1E7ED}', '\u{1E7EE}'),
851    ('\u{1E7F0}', '\u{1E7FE}'),
852    ('\u{1E800}', '\u{1E8C4}'),
853    ('\u{1E900}', '\u{1E943}'),
854    ('\u{1E94B}', '\u{1E94B}'),
855    ('\u{1EE00}', '\u{1EE03}'),
856    ('\u{1EE05}', '\u{1EE1F}'),
857    ('\u{1EE21}', '\u{1EE22}'),
858    ('\u{1EE24}', '\u{1EE24}'),
859    ('\u{1EE27}', '\u{1EE27}'),
860    ('\u{1EE29}', '\u{1EE32}'),
861    ('\u{1EE34}', '\u{1EE37}'),
862    ('\u{1EE39}', '\u{1EE39}'),
863    ('\u{1EE3B}', '\u{1EE3B}'),
864    ('\u{1EE42}', '\u{1EE42}'),
865    ('\u{1EE47}', '\u{1EE47}'),
866    ('\u{1EE49}', '\u{1EE49}'),
867    ('\u{1EE4B}', '\u{1EE4B}'),
868    ('\u{1EE4D}', '\u{1EE4F}'),
869    ('\u{1EE51}', '\u{1EE52}'),
870    ('\u{1EE54}', '\u{1EE54}'),
871    ('\u{1EE57}', '\u{1EE57}'),
872    ('\u{1EE59}', '\u{1EE59}'),
873    ('\u{1EE5B}', '\u{1EE5B}'),
874    ('\u{1EE5D}', '\u{1EE5D}'),
875    ('\u{1EE5F}', '\u{1EE5F}'),
876    ('\u{1EE61}', '\u{1EE62}'),
877    ('\u{1EE64}', '\u{1EE64}'),
878    ('\u{1EE67}', '\u{1EE6A}'),
879    ('\u{1EE6C}', '\u{1EE72}'),
880    ('\u{1EE74}', '\u{1EE77}'),
881    ('\u{1EE79}', '\u{1EE7C}'),
882    ('\u{1EE7E}', '\u{1EE7E}'),
883    ('\u{1EE80}', '\u{1EE89}'),
884    ('\u{1EE8B}', '\u{1EE9B}'),
885    ('\u{1EEA1}', '\u{1EEA3}'),
886    ('\u{1EEA5}', '\u{1EEA9}'),
887    ('\u{1EEAB}', '\u{1EEBB}'),
888    ('\u{20000}', '\u{2A6DF}'),
889    ('\u{2A700}', '\u{2B739}'),
890    ('\u{2B740}', '\u{2B81D}'),
891    ('\u{2B820}', '\u{2CEA1}'),
892    ('\u{2CEB0}', '\u{2EBE0}'),
893    ('\u{2EBF0}', '\u{2EE5D}'),
894    ('\u{2F800}', '\u{2FA1D}'),
895    ('\u{30000}', '\u{3134A}'),
896    ('\u{31350}', '\u{323AF}'),
897];
898
899/// Inclusive codepoint ranges of characters with the Unicode `Uppercase`
900/// property (general category `Lu` plus `Other_Uppercase`, e.g. roman
901/// numerals `Ⅻ` and circled letters `Ⓐ`). Mirrors CPython's
902/// `Py_UNICODE_ISUPPER`. Titlecase (`Lt`) characters are excluded (they
903/// live in [`TITLE_RANGES`]).
904///
905/// Derived live from the oracle CPython 3.13 / Unicode 15.1.0,
906/// run-length compressed to 651 ranges.
907const UPPER_RANGES: &[(char, char)] = &[
908    ('\u{0041}', '\u{005A}'),
909    ('\u{00C0}', '\u{00D6}'),
910    ('\u{00D8}', '\u{00DE}'),
911    ('\u{0100}', '\u{0100}'),
912    ('\u{0102}', '\u{0102}'),
913    ('\u{0104}', '\u{0104}'),
914    ('\u{0106}', '\u{0106}'),
915    ('\u{0108}', '\u{0108}'),
916    ('\u{010A}', '\u{010A}'),
917    ('\u{010C}', '\u{010C}'),
918    ('\u{010E}', '\u{010E}'),
919    ('\u{0110}', '\u{0110}'),
920    ('\u{0112}', '\u{0112}'),
921    ('\u{0114}', '\u{0114}'),
922    ('\u{0116}', '\u{0116}'),
923    ('\u{0118}', '\u{0118}'),
924    ('\u{011A}', '\u{011A}'),
925    ('\u{011C}', '\u{011C}'),
926    ('\u{011E}', '\u{011E}'),
927    ('\u{0120}', '\u{0120}'),
928    ('\u{0122}', '\u{0122}'),
929    ('\u{0124}', '\u{0124}'),
930    ('\u{0126}', '\u{0126}'),
931    ('\u{0128}', '\u{0128}'),
932    ('\u{012A}', '\u{012A}'),
933    ('\u{012C}', '\u{012C}'),
934    ('\u{012E}', '\u{012E}'),
935    ('\u{0130}', '\u{0130}'),
936    ('\u{0132}', '\u{0132}'),
937    ('\u{0134}', '\u{0134}'),
938    ('\u{0136}', '\u{0136}'),
939    ('\u{0139}', '\u{0139}'),
940    ('\u{013B}', '\u{013B}'),
941    ('\u{013D}', '\u{013D}'),
942    ('\u{013F}', '\u{013F}'),
943    ('\u{0141}', '\u{0141}'),
944    ('\u{0143}', '\u{0143}'),
945    ('\u{0145}', '\u{0145}'),
946    ('\u{0147}', '\u{0147}'),
947    ('\u{014A}', '\u{014A}'),
948    ('\u{014C}', '\u{014C}'),
949    ('\u{014E}', '\u{014E}'),
950    ('\u{0150}', '\u{0150}'),
951    ('\u{0152}', '\u{0152}'),
952    ('\u{0154}', '\u{0154}'),
953    ('\u{0156}', '\u{0156}'),
954    ('\u{0158}', '\u{0158}'),
955    ('\u{015A}', '\u{015A}'),
956    ('\u{015C}', '\u{015C}'),
957    ('\u{015E}', '\u{015E}'),
958    ('\u{0160}', '\u{0160}'),
959    ('\u{0162}', '\u{0162}'),
960    ('\u{0164}', '\u{0164}'),
961    ('\u{0166}', '\u{0166}'),
962    ('\u{0168}', '\u{0168}'),
963    ('\u{016A}', '\u{016A}'),
964    ('\u{016C}', '\u{016C}'),
965    ('\u{016E}', '\u{016E}'),
966    ('\u{0170}', '\u{0170}'),
967    ('\u{0172}', '\u{0172}'),
968    ('\u{0174}', '\u{0174}'),
969    ('\u{0176}', '\u{0176}'),
970    ('\u{0178}', '\u{0179}'),
971    ('\u{017B}', '\u{017B}'),
972    ('\u{017D}', '\u{017D}'),
973    ('\u{0181}', '\u{0182}'),
974    ('\u{0184}', '\u{0184}'),
975    ('\u{0186}', '\u{0187}'),
976    ('\u{0189}', '\u{018B}'),
977    ('\u{018E}', '\u{0191}'),
978    ('\u{0193}', '\u{0194}'),
979    ('\u{0196}', '\u{0198}'),
980    ('\u{019C}', '\u{019D}'),
981    ('\u{019F}', '\u{01A0}'),
982    ('\u{01A2}', '\u{01A2}'),
983    ('\u{01A4}', '\u{01A4}'),
984    ('\u{01A6}', '\u{01A7}'),
985    ('\u{01A9}', '\u{01A9}'),
986    ('\u{01AC}', '\u{01AC}'),
987    ('\u{01AE}', '\u{01AF}'),
988    ('\u{01B1}', '\u{01B3}'),
989    ('\u{01B5}', '\u{01B5}'),
990    ('\u{01B7}', '\u{01B8}'),
991    ('\u{01BC}', '\u{01BC}'),
992    ('\u{01C4}', '\u{01C4}'),
993    ('\u{01C7}', '\u{01C7}'),
994    ('\u{01CA}', '\u{01CA}'),
995    ('\u{01CD}', '\u{01CD}'),
996    ('\u{01CF}', '\u{01CF}'),
997    ('\u{01D1}', '\u{01D1}'),
998    ('\u{01D3}', '\u{01D3}'),
999    ('\u{01D5}', '\u{01D5}'),
1000    ('\u{01D7}', '\u{01D7}'),
1001    ('\u{01D9}', '\u{01D9}'),
1002    ('\u{01DB}', '\u{01DB}'),
1003    ('\u{01DE}', '\u{01DE}'),
1004    ('\u{01E0}', '\u{01E0}'),
1005    ('\u{01E2}', '\u{01E2}'),
1006    ('\u{01E4}', '\u{01E4}'),
1007    ('\u{01E6}', '\u{01E6}'),
1008    ('\u{01E8}', '\u{01E8}'),
1009    ('\u{01EA}', '\u{01EA}'),
1010    ('\u{01EC}', '\u{01EC}'),
1011    ('\u{01EE}', '\u{01EE}'),
1012    ('\u{01F1}', '\u{01F1}'),
1013    ('\u{01F4}', '\u{01F4}'),
1014    ('\u{01F6}', '\u{01F8}'),
1015    ('\u{01FA}', '\u{01FA}'),
1016    ('\u{01FC}', '\u{01FC}'),
1017    ('\u{01FE}', '\u{01FE}'),
1018    ('\u{0200}', '\u{0200}'),
1019    ('\u{0202}', '\u{0202}'),
1020    ('\u{0204}', '\u{0204}'),
1021    ('\u{0206}', '\u{0206}'),
1022    ('\u{0208}', '\u{0208}'),
1023    ('\u{020A}', '\u{020A}'),
1024    ('\u{020C}', '\u{020C}'),
1025    ('\u{020E}', '\u{020E}'),
1026    ('\u{0210}', '\u{0210}'),
1027    ('\u{0212}', '\u{0212}'),
1028    ('\u{0214}', '\u{0214}'),
1029    ('\u{0216}', '\u{0216}'),
1030    ('\u{0218}', '\u{0218}'),
1031    ('\u{021A}', '\u{021A}'),
1032    ('\u{021C}', '\u{021C}'),
1033    ('\u{021E}', '\u{021E}'),
1034    ('\u{0220}', '\u{0220}'),
1035    ('\u{0222}', '\u{0222}'),
1036    ('\u{0224}', '\u{0224}'),
1037    ('\u{0226}', '\u{0226}'),
1038    ('\u{0228}', '\u{0228}'),
1039    ('\u{022A}', '\u{022A}'),
1040    ('\u{022C}', '\u{022C}'),
1041    ('\u{022E}', '\u{022E}'),
1042    ('\u{0230}', '\u{0230}'),
1043    ('\u{0232}', '\u{0232}'),
1044    ('\u{023A}', '\u{023B}'),
1045    ('\u{023D}', '\u{023E}'),
1046    ('\u{0241}', '\u{0241}'),
1047    ('\u{0243}', '\u{0246}'),
1048    ('\u{0248}', '\u{0248}'),
1049    ('\u{024A}', '\u{024A}'),
1050    ('\u{024C}', '\u{024C}'),
1051    ('\u{024E}', '\u{024E}'),
1052    ('\u{0370}', '\u{0370}'),
1053    ('\u{0372}', '\u{0372}'),
1054    ('\u{0376}', '\u{0376}'),
1055    ('\u{037F}', '\u{037F}'),
1056    ('\u{0386}', '\u{0386}'),
1057    ('\u{0388}', '\u{038A}'),
1058    ('\u{038C}', '\u{038C}'),
1059    ('\u{038E}', '\u{038F}'),
1060    ('\u{0391}', '\u{03A1}'),
1061    ('\u{03A3}', '\u{03AB}'),
1062    ('\u{03CF}', '\u{03CF}'),
1063    ('\u{03D2}', '\u{03D4}'),
1064    ('\u{03D8}', '\u{03D8}'),
1065    ('\u{03DA}', '\u{03DA}'),
1066    ('\u{03DC}', '\u{03DC}'),
1067    ('\u{03DE}', '\u{03DE}'),
1068    ('\u{03E0}', '\u{03E0}'),
1069    ('\u{03E2}', '\u{03E2}'),
1070    ('\u{03E4}', '\u{03E4}'),
1071    ('\u{03E6}', '\u{03E6}'),
1072    ('\u{03E8}', '\u{03E8}'),
1073    ('\u{03EA}', '\u{03EA}'),
1074    ('\u{03EC}', '\u{03EC}'),
1075    ('\u{03EE}', '\u{03EE}'),
1076    ('\u{03F4}', '\u{03F4}'),
1077    ('\u{03F7}', '\u{03F7}'),
1078    ('\u{03F9}', '\u{03FA}'),
1079    ('\u{03FD}', '\u{042F}'),
1080    ('\u{0460}', '\u{0460}'),
1081    ('\u{0462}', '\u{0462}'),
1082    ('\u{0464}', '\u{0464}'),
1083    ('\u{0466}', '\u{0466}'),
1084    ('\u{0468}', '\u{0468}'),
1085    ('\u{046A}', '\u{046A}'),
1086    ('\u{046C}', '\u{046C}'),
1087    ('\u{046E}', '\u{046E}'),
1088    ('\u{0470}', '\u{0470}'),
1089    ('\u{0472}', '\u{0472}'),
1090    ('\u{0474}', '\u{0474}'),
1091    ('\u{0476}', '\u{0476}'),
1092    ('\u{0478}', '\u{0478}'),
1093    ('\u{047A}', '\u{047A}'),
1094    ('\u{047C}', '\u{047C}'),
1095    ('\u{047E}', '\u{047E}'),
1096    ('\u{0480}', '\u{0480}'),
1097    ('\u{048A}', '\u{048A}'),
1098    ('\u{048C}', '\u{048C}'),
1099    ('\u{048E}', '\u{048E}'),
1100    ('\u{0490}', '\u{0490}'),
1101    ('\u{0492}', '\u{0492}'),
1102    ('\u{0494}', '\u{0494}'),
1103    ('\u{0496}', '\u{0496}'),
1104    ('\u{0498}', '\u{0498}'),
1105    ('\u{049A}', '\u{049A}'),
1106    ('\u{049C}', '\u{049C}'),
1107    ('\u{049E}', '\u{049E}'),
1108    ('\u{04A0}', '\u{04A0}'),
1109    ('\u{04A2}', '\u{04A2}'),
1110    ('\u{04A4}', '\u{04A4}'),
1111    ('\u{04A6}', '\u{04A6}'),
1112    ('\u{04A8}', '\u{04A8}'),
1113    ('\u{04AA}', '\u{04AA}'),
1114    ('\u{04AC}', '\u{04AC}'),
1115    ('\u{04AE}', '\u{04AE}'),
1116    ('\u{04B0}', '\u{04B0}'),
1117    ('\u{04B2}', '\u{04B2}'),
1118    ('\u{04B4}', '\u{04B4}'),
1119    ('\u{04B6}', '\u{04B6}'),
1120    ('\u{04B8}', '\u{04B8}'),
1121    ('\u{04BA}', '\u{04BA}'),
1122    ('\u{04BC}', '\u{04BC}'),
1123    ('\u{04BE}', '\u{04BE}'),
1124    ('\u{04C0}', '\u{04C1}'),
1125    ('\u{04C3}', '\u{04C3}'),
1126    ('\u{04C5}', '\u{04C5}'),
1127    ('\u{04C7}', '\u{04C7}'),
1128    ('\u{04C9}', '\u{04C9}'),
1129    ('\u{04CB}', '\u{04CB}'),
1130    ('\u{04CD}', '\u{04CD}'),
1131    ('\u{04D0}', '\u{04D0}'),
1132    ('\u{04D2}', '\u{04D2}'),
1133    ('\u{04D4}', '\u{04D4}'),
1134    ('\u{04D6}', '\u{04D6}'),
1135    ('\u{04D8}', '\u{04D8}'),
1136    ('\u{04DA}', '\u{04DA}'),
1137    ('\u{04DC}', '\u{04DC}'),
1138    ('\u{04DE}', '\u{04DE}'),
1139    ('\u{04E0}', '\u{04E0}'),
1140    ('\u{04E2}', '\u{04E2}'),
1141    ('\u{04E4}', '\u{04E4}'),
1142    ('\u{04E6}', '\u{04E6}'),
1143    ('\u{04E8}', '\u{04E8}'),
1144    ('\u{04EA}', '\u{04EA}'),
1145    ('\u{04EC}', '\u{04EC}'),
1146    ('\u{04EE}', '\u{04EE}'),
1147    ('\u{04F0}', '\u{04F0}'),
1148    ('\u{04F2}', '\u{04F2}'),
1149    ('\u{04F4}', '\u{04F4}'),
1150    ('\u{04F6}', '\u{04F6}'),
1151    ('\u{04F8}', '\u{04F8}'),
1152    ('\u{04FA}', '\u{04FA}'),
1153    ('\u{04FC}', '\u{04FC}'),
1154    ('\u{04FE}', '\u{04FE}'),
1155    ('\u{0500}', '\u{0500}'),
1156    ('\u{0502}', '\u{0502}'),
1157    ('\u{0504}', '\u{0504}'),
1158    ('\u{0506}', '\u{0506}'),
1159    ('\u{0508}', '\u{0508}'),
1160    ('\u{050A}', '\u{050A}'),
1161    ('\u{050C}', '\u{050C}'),
1162    ('\u{050E}', '\u{050E}'),
1163    ('\u{0510}', '\u{0510}'),
1164    ('\u{0512}', '\u{0512}'),
1165    ('\u{0514}', '\u{0514}'),
1166    ('\u{0516}', '\u{0516}'),
1167    ('\u{0518}', '\u{0518}'),
1168    ('\u{051A}', '\u{051A}'),
1169    ('\u{051C}', '\u{051C}'),
1170    ('\u{051E}', '\u{051E}'),
1171    ('\u{0520}', '\u{0520}'),
1172    ('\u{0522}', '\u{0522}'),
1173    ('\u{0524}', '\u{0524}'),
1174    ('\u{0526}', '\u{0526}'),
1175    ('\u{0528}', '\u{0528}'),
1176    ('\u{052A}', '\u{052A}'),
1177    ('\u{052C}', '\u{052C}'),
1178    ('\u{052E}', '\u{052E}'),
1179    ('\u{0531}', '\u{0556}'),
1180    ('\u{10A0}', '\u{10C5}'),
1181    ('\u{10C7}', '\u{10C7}'),
1182    ('\u{10CD}', '\u{10CD}'),
1183    ('\u{13A0}', '\u{13F5}'),
1184    ('\u{1C90}', '\u{1CBA}'),
1185    ('\u{1CBD}', '\u{1CBF}'),
1186    ('\u{1E00}', '\u{1E00}'),
1187    ('\u{1E02}', '\u{1E02}'),
1188    ('\u{1E04}', '\u{1E04}'),
1189    ('\u{1E06}', '\u{1E06}'),
1190    ('\u{1E08}', '\u{1E08}'),
1191    ('\u{1E0A}', '\u{1E0A}'),
1192    ('\u{1E0C}', '\u{1E0C}'),
1193    ('\u{1E0E}', '\u{1E0E}'),
1194    ('\u{1E10}', '\u{1E10}'),
1195    ('\u{1E12}', '\u{1E12}'),
1196    ('\u{1E14}', '\u{1E14}'),
1197    ('\u{1E16}', '\u{1E16}'),
1198    ('\u{1E18}', '\u{1E18}'),
1199    ('\u{1E1A}', '\u{1E1A}'),
1200    ('\u{1E1C}', '\u{1E1C}'),
1201    ('\u{1E1E}', '\u{1E1E}'),
1202    ('\u{1E20}', '\u{1E20}'),
1203    ('\u{1E22}', '\u{1E22}'),
1204    ('\u{1E24}', '\u{1E24}'),
1205    ('\u{1E26}', '\u{1E26}'),
1206    ('\u{1E28}', '\u{1E28}'),
1207    ('\u{1E2A}', '\u{1E2A}'),
1208    ('\u{1E2C}', '\u{1E2C}'),
1209    ('\u{1E2E}', '\u{1E2E}'),
1210    ('\u{1E30}', '\u{1E30}'),
1211    ('\u{1E32}', '\u{1E32}'),
1212    ('\u{1E34}', '\u{1E34}'),
1213    ('\u{1E36}', '\u{1E36}'),
1214    ('\u{1E38}', '\u{1E38}'),
1215    ('\u{1E3A}', '\u{1E3A}'),
1216    ('\u{1E3C}', '\u{1E3C}'),
1217    ('\u{1E3E}', '\u{1E3E}'),
1218    ('\u{1E40}', '\u{1E40}'),
1219    ('\u{1E42}', '\u{1E42}'),
1220    ('\u{1E44}', '\u{1E44}'),
1221    ('\u{1E46}', '\u{1E46}'),
1222    ('\u{1E48}', '\u{1E48}'),
1223    ('\u{1E4A}', '\u{1E4A}'),
1224    ('\u{1E4C}', '\u{1E4C}'),
1225    ('\u{1E4E}', '\u{1E4E}'),
1226    ('\u{1E50}', '\u{1E50}'),
1227    ('\u{1E52}', '\u{1E52}'),
1228    ('\u{1E54}', '\u{1E54}'),
1229    ('\u{1E56}', '\u{1E56}'),
1230    ('\u{1E58}', '\u{1E58}'),
1231    ('\u{1E5A}', '\u{1E5A}'),
1232    ('\u{1E5C}', '\u{1E5C}'),
1233    ('\u{1E5E}', '\u{1E5E}'),
1234    ('\u{1E60}', '\u{1E60}'),
1235    ('\u{1E62}', '\u{1E62}'),
1236    ('\u{1E64}', '\u{1E64}'),
1237    ('\u{1E66}', '\u{1E66}'),
1238    ('\u{1E68}', '\u{1E68}'),
1239    ('\u{1E6A}', '\u{1E6A}'),
1240    ('\u{1E6C}', '\u{1E6C}'),
1241    ('\u{1E6E}', '\u{1E6E}'),
1242    ('\u{1E70}', '\u{1E70}'),
1243    ('\u{1E72}', '\u{1E72}'),
1244    ('\u{1E74}', '\u{1E74}'),
1245    ('\u{1E76}', '\u{1E76}'),
1246    ('\u{1E78}', '\u{1E78}'),
1247    ('\u{1E7A}', '\u{1E7A}'),
1248    ('\u{1E7C}', '\u{1E7C}'),
1249    ('\u{1E7E}', '\u{1E7E}'),
1250    ('\u{1E80}', '\u{1E80}'),
1251    ('\u{1E82}', '\u{1E82}'),
1252    ('\u{1E84}', '\u{1E84}'),
1253    ('\u{1E86}', '\u{1E86}'),
1254    ('\u{1E88}', '\u{1E88}'),
1255    ('\u{1E8A}', '\u{1E8A}'),
1256    ('\u{1E8C}', '\u{1E8C}'),
1257    ('\u{1E8E}', '\u{1E8E}'),
1258    ('\u{1E90}', '\u{1E90}'),
1259    ('\u{1E92}', '\u{1E92}'),
1260    ('\u{1E94}', '\u{1E94}'),
1261    ('\u{1E9E}', '\u{1E9E}'),
1262    ('\u{1EA0}', '\u{1EA0}'),
1263    ('\u{1EA2}', '\u{1EA2}'),
1264    ('\u{1EA4}', '\u{1EA4}'),
1265    ('\u{1EA6}', '\u{1EA6}'),
1266    ('\u{1EA8}', '\u{1EA8}'),
1267    ('\u{1EAA}', '\u{1EAA}'),
1268    ('\u{1EAC}', '\u{1EAC}'),
1269    ('\u{1EAE}', '\u{1EAE}'),
1270    ('\u{1EB0}', '\u{1EB0}'),
1271    ('\u{1EB2}', '\u{1EB2}'),
1272    ('\u{1EB4}', '\u{1EB4}'),
1273    ('\u{1EB6}', '\u{1EB6}'),
1274    ('\u{1EB8}', '\u{1EB8}'),
1275    ('\u{1EBA}', '\u{1EBA}'),
1276    ('\u{1EBC}', '\u{1EBC}'),
1277    ('\u{1EBE}', '\u{1EBE}'),
1278    ('\u{1EC0}', '\u{1EC0}'),
1279    ('\u{1EC2}', '\u{1EC2}'),
1280    ('\u{1EC4}', '\u{1EC4}'),
1281    ('\u{1EC6}', '\u{1EC6}'),
1282    ('\u{1EC8}', '\u{1EC8}'),
1283    ('\u{1ECA}', '\u{1ECA}'),
1284    ('\u{1ECC}', '\u{1ECC}'),
1285    ('\u{1ECE}', '\u{1ECE}'),
1286    ('\u{1ED0}', '\u{1ED0}'),
1287    ('\u{1ED2}', '\u{1ED2}'),
1288    ('\u{1ED4}', '\u{1ED4}'),
1289    ('\u{1ED6}', '\u{1ED6}'),
1290    ('\u{1ED8}', '\u{1ED8}'),
1291    ('\u{1EDA}', '\u{1EDA}'),
1292    ('\u{1EDC}', '\u{1EDC}'),
1293    ('\u{1EDE}', '\u{1EDE}'),
1294    ('\u{1EE0}', '\u{1EE0}'),
1295    ('\u{1EE2}', '\u{1EE2}'),
1296    ('\u{1EE4}', '\u{1EE4}'),
1297    ('\u{1EE6}', '\u{1EE6}'),
1298    ('\u{1EE8}', '\u{1EE8}'),
1299    ('\u{1EEA}', '\u{1EEA}'),
1300    ('\u{1EEC}', '\u{1EEC}'),
1301    ('\u{1EEE}', '\u{1EEE}'),
1302    ('\u{1EF0}', '\u{1EF0}'),
1303    ('\u{1EF2}', '\u{1EF2}'),
1304    ('\u{1EF4}', '\u{1EF4}'),
1305    ('\u{1EF6}', '\u{1EF6}'),
1306    ('\u{1EF8}', '\u{1EF8}'),
1307    ('\u{1EFA}', '\u{1EFA}'),
1308    ('\u{1EFC}', '\u{1EFC}'),
1309    ('\u{1EFE}', '\u{1EFE}'),
1310    ('\u{1F08}', '\u{1F0F}'),
1311    ('\u{1F18}', '\u{1F1D}'),
1312    ('\u{1F28}', '\u{1F2F}'),
1313    ('\u{1F38}', '\u{1F3F}'),
1314    ('\u{1F48}', '\u{1F4D}'),
1315    ('\u{1F59}', '\u{1F59}'),
1316    ('\u{1F5B}', '\u{1F5B}'),
1317    ('\u{1F5D}', '\u{1F5D}'),
1318    ('\u{1F5F}', '\u{1F5F}'),
1319    ('\u{1F68}', '\u{1F6F}'),
1320    ('\u{1FB8}', '\u{1FBB}'),
1321    ('\u{1FC8}', '\u{1FCB}'),
1322    ('\u{1FD8}', '\u{1FDB}'),
1323    ('\u{1FE8}', '\u{1FEC}'),
1324    ('\u{1FF8}', '\u{1FFB}'),
1325    ('\u{2102}', '\u{2102}'),
1326    ('\u{2107}', '\u{2107}'),
1327    ('\u{210B}', '\u{210D}'),
1328    ('\u{2110}', '\u{2112}'),
1329    ('\u{2115}', '\u{2115}'),
1330    ('\u{2119}', '\u{211D}'),
1331    ('\u{2124}', '\u{2124}'),
1332    ('\u{2126}', '\u{2126}'),
1333    ('\u{2128}', '\u{2128}'),
1334    ('\u{212A}', '\u{212D}'),
1335    ('\u{2130}', '\u{2133}'),
1336    ('\u{213E}', '\u{213F}'),
1337    ('\u{2145}', '\u{2145}'),
1338    ('\u{2160}', '\u{216F}'),
1339    ('\u{2183}', '\u{2183}'),
1340    ('\u{24B6}', '\u{24CF}'),
1341    ('\u{2C00}', '\u{2C2F}'),
1342    ('\u{2C60}', '\u{2C60}'),
1343    ('\u{2C62}', '\u{2C64}'),
1344    ('\u{2C67}', '\u{2C67}'),
1345    ('\u{2C69}', '\u{2C69}'),
1346    ('\u{2C6B}', '\u{2C6B}'),
1347    ('\u{2C6D}', '\u{2C70}'),
1348    ('\u{2C72}', '\u{2C72}'),
1349    ('\u{2C75}', '\u{2C75}'),
1350    ('\u{2C7E}', '\u{2C80}'),
1351    ('\u{2C82}', '\u{2C82}'),
1352    ('\u{2C84}', '\u{2C84}'),
1353    ('\u{2C86}', '\u{2C86}'),
1354    ('\u{2C88}', '\u{2C88}'),
1355    ('\u{2C8A}', '\u{2C8A}'),
1356    ('\u{2C8C}', '\u{2C8C}'),
1357    ('\u{2C8E}', '\u{2C8E}'),
1358    ('\u{2C90}', '\u{2C90}'),
1359    ('\u{2C92}', '\u{2C92}'),
1360    ('\u{2C94}', '\u{2C94}'),
1361    ('\u{2C96}', '\u{2C96}'),
1362    ('\u{2C98}', '\u{2C98}'),
1363    ('\u{2C9A}', '\u{2C9A}'),
1364    ('\u{2C9C}', '\u{2C9C}'),
1365    ('\u{2C9E}', '\u{2C9E}'),
1366    ('\u{2CA0}', '\u{2CA0}'),
1367    ('\u{2CA2}', '\u{2CA2}'),
1368    ('\u{2CA4}', '\u{2CA4}'),
1369    ('\u{2CA6}', '\u{2CA6}'),
1370    ('\u{2CA8}', '\u{2CA8}'),
1371    ('\u{2CAA}', '\u{2CAA}'),
1372    ('\u{2CAC}', '\u{2CAC}'),
1373    ('\u{2CAE}', '\u{2CAE}'),
1374    ('\u{2CB0}', '\u{2CB0}'),
1375    ('\u{2CB2}', '\u{2CB2}'),
1376    ('\u{2CB4}', '\u{2CB4}'),
1377    ('\u{2CB6}', '\u{2CB6}'),
1378    ('\u{2CB8}', '\u{2CB8}'),
1379    ('\u{2CBA}', '\u{2CBA}'),
1380    ('\u{2CBC}', '\u{2CBC}'),
1381    ('\u{2CBE}', '\u{2CBE}'),
1382    ('\u{2CC0}', '\u{2CC0}'),
1383    ('\u{2CC2}', '\u{2CC2}'),
1384    ('\u{2CC4}', '\u{2CC4}'),
1385    ('\u{2CC6}', '\u{2CC6}'),
1386    ('\u{2CC8}', '\u{2CC8}'),
1387    ('\u{2CCA}', '\u{2CCA}'),
1388    ('\u{2CCC}', '\u{2CCC}'),
1389    ('\u{2CCE}', '\u{2CCE}'),
1390    ('\u{2CD0}', '\u{2CD0}'),
1391    ('\u{2CD2}', '\u{2CD2}'),
1392    ('\u{2CD4}', '\u{2CD4}'),
1393    ('\u{2CD6}', '\u{2CD6}'),
1394    ('\u{2CD8}', '\u{2CD8}'),
1395    ('\u{2CDA}', '\u{2CDA}'),
1396    ('\u{2CDC}', '\u{2CDC}'),
1397    ('\u{2CDE}', '\u{2CDE}'),
1398    ('\u{2CE0}', '\u{2CE0}'),
1399    ('\u{2CE2}', '\u{2CE2}'),
1400    ('\u{2CEB}', '\u{2CEB}'),
1401    ('\u{2CED}', '\u{2CED}'),
1402    ('\u{2CF2}', '\u{2CF2}'),
1403    ('\u{A640}', '\u{A640}'),
1404    ('\u{A642}', '\u{A642}'),
1405    ('\u{A644}', '\u{A644}'),
1406    ('\u{A646}', '\u{A646}'),
1407    ('\u{A648}', '\u{A648}'),
1408    ('\u{A64A}', '\u{A64A}'),
1409    ('\u{A64C}', '\u{A64C}'),
1410    ('\u{A64E}', '\u{A64E}'),
1411    ('\u{A650}', '\u{A650}'),
1412    ('\u{A652}', '\u{A652}'),
1413    ('\u{A654}', '\u{A654}'),
1414    ('\u{A656}', '\u{A656}'),
1415    ('\u{A658}', '\u{A658}'),
1416    ('\u{A65A}', '\u{A65A}'),
1417    ('\u{A65C}', '\u{A65C}'),
1418    ('\u{A65E}', '\u{A65E}'),
1419    ('\u{A660}', '\u{A660}'),
1420    ('\u{A662}', '\u{A662}'),
1421    ('\u{A664}', '\u{A664}'),
1422    ('\u{A666}', '\u{A666}'),
1423    ('\u{A668}', '\u{A668}'),
1424    ('\u{A66A}', '\u{A66A}'),
1425    ('\u{A66C}', '\u{A66C}'),
1426    ('\u{A680}', '\u{A680}'),
1427    ('\u{A682}', '\u{A682}'),
1428    ('\u{A684}', '\u{A684}'),
1429    ('\u{A686}', '\u{A686}'),
1430    ('\u{A688}', '\u{A688}'),
1431    ('\u{A68A}', '\u{A68A}'),
1432    ('\u{A68C}', '\u{A68C}'),
1433    ('\u{A68E}', '\u{A68E}'),
1434    ('\u{A690}', '\u{A690}'),
1435    ('\u{A692}', '\u{A692}'),
1436    ('\u{A694}', '\u{A694}'),
1437    ('\u{A696}', '\u{A696}'),
1438    ('\u{A698}', '\u{A698}'),
1439    ('\u{A69A}', '\u{A69A}'),
1440    ('\u{A722}', '\u{A722}'),
1441    ('\u{A724}', '\u{A724}'),
1442    ('\u{A726}', '\u{A726}'),
1443    ('\u{A728}', '\u{A728}'),
1444    ('\u{A72A}', '\u{A72A}'),
1445    ('\u{A72C}', '\u{A72C}'),
1446    ('\u{A72E}', '\u{A72E}'),
1447    ('\u{A732}', '\u{A732}'),
1448    ('\u{A734}', '\u{A734}'),
1449    ('\u{A736}', '\u{A736}'),
1450    ('\u{A738}', '\u{A738}'),
1451    ('\u{A73A}', '\u{A73A}'),
1452    ('\u{A73C}', '\u{A73C}'),
1453    ('\u{A73E}', '\u{A73E}'),
1454    ('\u{A740}', '\u{A740}'),
1455    ('\u{A742}', '\u{A742}'),
1456    ('\u{A744}', '\u{A744}'),
1457    ('\u{A746}', '\u{A746}'),
1458    ('\u{A748}', '\u{A748}'),
1459    ('\u{A74A}', '\u{A74A}'),
1460    ('\u{A74C}', '\u{A74C}'),
1461    ('\u{A74E}', '\u{A74E}'),
1462    ('\u{A750}', '\u{A750}'),
1463    ('\u{A752}', '\u{A752}'),
1464    ('\u{A754}', '\u{A754}'),
1465    ('\u{A756}', '\u{A756}'),
1466    ('\u{A758}', '\u{A758}'),
1467    ('\u{A75A}', '\u{A75A}'),
1468    ('\u{A75C}', '\u{A75C}'),
1469    ('\u{A75E}', '\u{A75E}'),
1470    ('\u{A760}', '\u{A760}'),
1471    ('\u{A762}', '\u{A762}'),
1472    ('\u{A764}', '\u{A764}'),
1473    ('\u{A766}', '\u{A766}'),
1474    ('\u{A768}', '\u{A768}'),
1475    ('\u{A76A}', '\u{A76A}'),
1476    ('\u{A76C}', '\u{A76C}'),
1477    ('\u{A76E}', '\u{A76E}'),
1478    ('\u{A779}', '\u{A779}'),
1479    ('\u{A77B}', '\u{A77B}'),
1480    ('\u{A77D}', '\u{A77E}'),
1481    ('\u{A780}', '\u{A780}'),
1482    ('\u{A782}', '\u{A782}'),
1483    ('\u{A784}', '\u{A784}'),
1484    ('\u{A786}', '\u{A786}'),
1485    ('\u{A78B}', '\u{A78B}'),
1486    ('\u{A78D}', '\u{A78D}'),
1487    ('\u{A790}', '\u{A790}'),
1488    ('\u{A792}', '\u{A792}'),
1489    ('\u{A796}', '\u{A796}'),
1490    ('\u{A798}', '\u{A798}'),
1491    ('\u{A79A}', '\u{A79A}'),
1492    ('\u{A79C}', '\u{A79C}'),
1493    ('\u{A79E}', '\u{A79E}'),
1494    ('\u{A7A0}', '\u{A7A0}'),
1495    ('\u{A7A2}', '\u{A7A2}'),
1496    ('\u{A7A4}', '\u{A7A4}'),
1497    ('\u{A7A6}', '\u{A7A6}'),
1498    ('\u{A7A8}', '\u{A7A8}'),
1499    ('\u{A7AA}', '\u{A7AE}'),
1500    ('\u{A7B0}', '\u{A7B4}'),
1501    ('\u{A7B6}', '\u{A7B6}'),
1502    ('\u{A7B8}', '\u{A7B8}'),
1503    ('\u{A7BA}', '\u{A7BA}'),
1504    ('\u{A7BC}', '\u{A7BC}'),
1505    ('\u{A7BE}', '\u{A7BE}'),
1506    ('\u{A7C0}', '\u{A7C0}'),
1507    ('\u{A7C2}', '\u{A7C2}'),
1508    ('\u{A7C4}', '\u{A7C7}'),
1509    ('\u{A7C9}', '\u{A7C9}'),
1510    ('\u{A7D0}', '\u{A7D0}'),
1511    ('\u{A7D6}', '\u{A7D6}'),
1512    ('\u{A7D8}', '\u{A7D8}'),
1513    ('\u{A7F5}', '\u{A7F5}'),
1514    ('\u{FF21}', '\u{FF3A}'),
1515    ('\u{10400}', '\u{10427}'),
1516    ('\u{104B0}', '\u{104D3}'),
1517    ('\u{10570}', '\u{1057A}'),
1518    ('\u{1057C}', '\u{1058A}'),
1519    ('\u{1058C}', '\u{10592}'),
1520    ('\u{10594}', '\u{10595}'),
1521    ('\u{10C80}', '\u{10CB2}'),
1522    ('\u{118A0}', '\u{118BF}'),
1523    ('\u{16E40}', '\u{16E5F}'),
1524    ('\u{1D400}', '\u{1D419}'),
1525    ('\u{1D434}', '\u{1D44D}'),
1526    ('\u{1D468}', '\u{1D481}'),
1527    ('\u{1D49C}', '\u{1D49C}'),
1528    ('\u{1D49E}', '\u{1D49F}'),
1529    ('\u{1D4A2}', '\u{1D4A2}'),
1530    ('\u{1D4A5}', '\u{1D4A6}'),
1531    ('\u{1D4A9}', '\u{1D4AC}'),
1532    ('\u{1D4AE}', '\u{1D4B5}'),
1533    ('\u{1D4D0}', '\u{1D4E9}'),
1534    ('\u{1D504}', '\u{1D505}'),
1535    ('\u{1D507}', '\u{1D50A}'),
1536    ('\u{1D50D}', '\u{1D514}'),
1537    ('\u{1D516}', '\u{1D51C}'),
1538    ('\u{1D538}', '\u{1D539}'),
1539    ('\u{1D53B}', '\u{1D53E}'),
1540    ('\u{1D540}', '\u{1D544}'),
1541    ('\u{1D546}', '\u{1D546}'),
1542    ('\u{1D54A}', '\u{1D550}'),
1543    ('\u{1D56C}', '\u{1D585}'),
1544    ('\u{1D5A0}', '\u{1D5B9}'),
1545    ('\u{1D5D4}', '\u{1D5ED}'),
1546    ('\u{1D608}', '\u{1D621}'),
1547    ('\u{1D63C}', '\u{1D655}'),
1548    ('\u{1D670}', '\u{1D689}'),
1549    ('\u{1D6A8}', '\u{1D6C0}'),
1550    ('\u{1D6E2}', '\u{1D6FA}'),
1551    ('\u{1D71C}', '\u{1D734}'),
1552    ('\u{1D756}', '\u{1D76E}'),
1553    ('\u{1D790}', '\u{1D7A8}'),
1554    ('\u{1D7CA}', '\u{1D7CA}'),
1555    ('\u{1E900}', '\u{1E921}'),
1556    ('\u{1F130}', '\u{1F149}'),
1557    ('\u{1F150}', '\u{1F169}'),
1558    ('\u{1F170}', '\u{1F189}'),
1559];
1560
1561/// Inclusive codepoint ranges of characters with the Unicode `Lowercase`
1562/// property (general category `Ll` plus `Other_Lowercase`, e.g. roman
1563/// numerals `ⅻ`). Mirrors CPython's `Py_UNICODE_ISLOWER`.
1564///
1565/// Derived live from the oracle CPython 3.13 / Unicode 15.1.0,
1566/// run-length compressed to 671 ranges.
1567const LOWER_RANGES: &[(char, char)] = &[
1568    ('\u{0061}', '\u{007A}'),
1569    ('\u{00AA}', '\u{00AA}'),
1570    ('\u{00B5}', '\u{00B5}'),
1571    ('\u{00BA}', '\u{00BA}'),
1572    ('\u{00DF}', '\u{00F6}'),
1573    ('\u{00F8}', '\u{00FF}'),
1574    ('\u{0101}', '\u{0101}'),
1575    ('\u{0103}', '\u{0103}'),
1576    ('\u{0105}', '\u{0105}'),
1577    ('\u{0107}', '\u{0107}'),
1578    ('\u{0109}', '\u{0109}'),
1579    ('\u{010B}', '\u{010B}'),
1580    ('\u{010D}', '\u{010D}'),
1581    ('\u{010F}', '\u{010F}'),
1582    ('\u{0111}', '\u{0111}'),
1583    ('\u{0113}', '\u{0113}'),
1584    ('\u{0115}', '\u{0115}'),
1585    ('\u{0117}', '\u{0117}'),
1586    ('\u{0119}', '\u{0119}'),
1587    ('\u{011B}', '\u{011B}'),
1588    ('\u{011D}', '\u{011D}'),
1589    ('\u{011F}', '\u{011F}'),
1590    ('\u{0121}', '\u{0121}'),
1591    ('\u{0123}', '\u{0123}'),
1592    ('\u{0125}', '\u{0125}'),
1593    ('\u{0127}', '\u{0127}'),
1594    ('\u{0129}', '\u{0129}'),
1595    ('\u{012B}', '\u{012B}'),
1596    ('\u{012D}', '\u{012D}'),
1597    ('\u{012F}', '\u{012F}'),
1598    ('\u{0131}', '\u{0131}'),
1599    ('\u{0133}', '\u{0133}'),
1600    ('\u{0135}', '\u{0135}'),
1601    ('\u{0137}', '\u{0138}'),
1602    ('\u{013A}', '\u{013A}'),
1603    ('\u{013C}', '\u{013C}'),
1604    ('\u{013E}', '\u{013E}'),
1605    ('\u{0140}', '\u{0140}'),
1606    ('\u{0142}', '\u{0142}'),
1607    ('\u{0144}', '\u{0144}'),
1608    ('\u{0146}', '\u{0146}'),
1609    ('\u{0148}', '\u{0149}'),
1610    ('\u{014B}', '\u{014B}'),
1611    ('\u{014D}', '\u{014D}'),
1612    ('\u{014F}', '\u{014F}'),
1613    ('\u{0151}', '\u{0151}'),
1614    ('\u{0153}', '\u{0153}'),
1615    ('\u{0155}', '\u{0155}'),
1616    ('\u{0157}', '\u{0157}'),
1617    ('\u{0159}', '\u{0159}'),
1618    ('\u{015B}', '\u{015B}'),
1619    ('\u{015D}', '\u{015D}'),
1620    ('\u{015F}', '\u{015F}'),
1621    ('\u{0161}', '\u{0161}'),
1622    ('\u{0163}', '\u{0163}'),
1623    ('\u{0165}', '\u{0165}'),
1624    ('\u{0167}', '\u{0167}'),
1625    ('\u{0169}', '\u{0169}'),
1626    ('\u{016B}', '\u{016B}'),
1627    ('\u{016D}', '\u{016D}'),
1628    ('\u{016F}', '\u{016F}'),
1629    ('\u{0171}', '\u{0171}'),
1630    ('\u{0173}', '\u{0173}'),
1631    ('\u{0175}', '\u{0175}'),
1632    ('\u{0177}', '\u{0177}'),
1633    ('\u{017A}', '\u{017A}'),
1634    ('\u{017C}', '\u{017C}'),
1635    ('\u{017E}', '\u{0180}'),
1636    ('\u{0183}', '\u{0183}'),
1637    ('\u{0185}', '\u{0185}'),
1638    ('\u{0188}', '\u{0188}'),
1639    ('\u{018C}', '\u{018D}'),
1640    ('\u{0192}', '\u{0192}'),
1641    ('\u{0195}', '\u{0195}'),
1642    ('\u{0199}', '\u{019B}'),
1643    ('\u{019E}', '\u{019E}'),
1644    ('\u{01A1}', '\u{01A1}'),
1645    ('\u{01A3}', '\u{01A3}'),
1646    ('\u{01A5}', '\u{01A5}'),
1647    ('\u{01A8}', '\u{01A8}'),
1648    ('\u{01AA}', '\u{01AB}'),
1649    ('\u{01AD}', '\u{01AD}'),
1650    ('\u{01B0}', '\u{01B0}'),
1651    ('\u{01B4}', '\u{01B4}'),
1652    ('\u{01B6}', '\u{01B6}'),
1653    ('\u{01B9}', '\u{01BA}'),
1654    ('\u{01BD}', '\u{01BF}'),
1655    ('\u{01C6}', '\u{01C6}'),
1656    ('\u{01C9}', '\u{01C9}'),
1657    ('\u{01CC}', '\u{01CC}'),
1658    ('\u{01CE}', '\u{01CE}'),
1659    ('\u{01D0}', '\u{01D0}'),
1660    ('\u{01D2}', '\u{01D2}'),
1661    ('\u{01D4}', '\u{01D4}'),
1662    ('\u{01D6}', '\u{01D6}'),
1663    ('\u{01D8}', '\u{01D8}'),
1664    ('\u{01DA}', '\u{01DA}'),
1665    ('\u{01DC}', '\u{01DD}'),
1666    ('\u{01DF}', '\u{01DF}'),
1667    ('\u{01E1}', '\u{01E1}'),
1668    ('\u{01E3}', '\u{01E3}'),
1669    ('\u{01E5}', '\u{01E5}'),
1670    ('\u{01E7}', '\u{01E7}'),
1671    ('\u{01E9}', '\u{01E9}'),
1672    ('\u{01EB}', '\u{01EB}'),
1673    ('\u{01ED}', '\u{01ED}'),
1674    ('\u{01EF}', '\u{01F0}'),
1675    ('\u{01F3}', '\u{01F3}'),
1676    ('\u{01F5}', '\u{01F5}'),
1677    ('\u{01F9}', '\u{01F9}'),
1678    ('\u{01FB}', '\u{01FB}'),
1679    ('\u{01FD}', '\u{01FD}'),
1680    ('\u{01FF}', '\u{01FF}'),
1681    ('\u{0201}', '\u{0201}'),
1682    ('\u{0203}', '\u{0203}'),
1683    ('\u{0205}', '\u{0205}'),
1684    ('\u{0207}', '\u{0207}'),
1685    ('\u{0209}', '\u{0209}'),
1686    ('\u{020B}', '\u{020B}'),
1687    ('\u{020D}', '\u{020D}'),
1688    ('\u{020F}', '\u{020F}'),
1689    ('\u{0211}', '\u{0211}'),
1690    ('\u{0213}', '\u{0213}'),
1691    ('\u{0215}', '\u{0215}'),
1692    ('\u{0217}', '\u{0217}'),
1693    ('\u{0219}', '\u{0219}'),
1694    ('\u{021B}', '\u{021B}'),
1695    ('\u{021D}', '\u{021D}'),
1696    ('\u{021F}', '\u{021F}'),
1697    ('\u{0221}', '\u{0221}'),
1698    ('\u{0223}', '\u{0223}'),
1699    ('\u{0225}', '\u{0225}'),
1700    ('\u{0227}', '\u{0227}'),
1701    ('\u{0229}', '\u{0229}'),
1702    ('\u{022B}', '\u{022B}'),
1703    ('\u{022D}', '\u{022D}'),
1704    ('\u{022F}', '\u{022F}'),
1705    ('\u{0231}', '\u{0231}'),
1706    ('\u{0233}', '\u{0239}'),
1707    ('\u{023C}', '\u{023C}'),
1708    ('\u{023F}', '\u{0240}'),
1709    ('\u{0242}', '\u{0242}'),
1710    ('\u{0247}', '\u{0247}'),
1711    ('\u{0249}', '\u{0249}'),
1712    ('\u{024B}', '\u{024B}'),
1713    ('\u{024D}', '\u{024D}'),
1714    ('\u{024F}', '\u{0293}'),
1715    ('\u{0295}', '\u{02B8}'),
1716    ('\u{02C0}', '\u{02C1}'),
1717    ('\u{02E0}', '\u{02E4}'),
1718    ('\u{0345}', '\u{0345}'),
1719    ('\u{0371}', '\u{0371}'),
1720    ('\u{0373}', '\u{0373}'),
1721    ('\u{0377}', '\u{0377}'),
1722    ('\u{037A}', '\u{037D}'),
1723    ('\u{0390}', '\u{0390}'),
1724    ('\u{03AC}', '\u{03CE}'),
1725    ('\u{03D0}', '\u{03D1}'),
1726    ('\u{03D5}', '\u{03D7}'),
1727    ('\u{03D9}', '\u{03D9}'),
1728    ('\u{03DB}', '\u{03DB}'),
1729    ('\u{03DD}', '\u{03DD}'),
1730    ('\u{03DF}', '\u{03DF}'),
1731    ('\u{03E1}', '\u{03E1}'),
1732    ('\u{03E3}', '\u{03E3}'),
1733    ('\u{03E5}', '\u{03E5}'),
1734    ('\u{03E7}', '\u{03E7}'),
1735    ('\u{03E9}', '\u{03E9}'),
1736    ('\u{03EB}', '\u{03EB}'),
1737    ('\u{03ED}', '\u{03ED}'),
1738    ('\u{03EF}', '\u{03F3}'),
1739    ('\u{03F5}', '\u{03F5}'),
1740    ('\u{03F8}', '\u{03F8}'),
1741    ('\u{03FB}', '\u{03FC}'),
1742    ('\u{0430}', '\u{045F}'),
1743    ('\u{0461}', '\u{0461}'),
1744    ('\u{0463}', '\u{0463}'),
1745    ('\u{0465}', '\u{0465}'),
1746    ('\u{0467}', '\u{0467}'),
1747    ('\u{0469}', '\u{0469}'),
1748    ('\u{046B}', '\u{046B}'),
1749    ('\u{046D}', '\u{046D}'),
1750    ('\u{046F}', '\u{046F}'),
1751    ('\u{0471}', '\u{0471}'),
1752    ('\u{0473}', '\u{0473}'),
1753    ('\u{0475}', '\u{0475}'),
1754    ('\u{0477}', '\u{0477}'),
1755    ('\u{0479}', '\u{0479}'),
1756    ('\u{047B}', '\u{047B}'),
1757    ('\u{047D}', '\u{047D}'),
1758    ('\u{047F}', '\u{047F}'),
1759    ('\u{0481}', '\u{0481}'),
1760    ('\u{048B}', '\u{048B}'),
1761    ('\u{048D}', '\u{048D}'),
1762    ('\u{048F}', '\u{048F}'),
1763    ('\u{0491}', '\u{0491}'),
1764    ('\u{0493}', '\u{0493}'),
1765    ('\u{0495}', '\u{0495}'),
1766    ('\u{0497}', '\u{0497}'),
1767    ('\u{0499}', '\u{0499}'),
1768    ('\u{049B}', '\u{049B}'),
1769    ('\u{049D}', '\u{049D}'),
1770    ('\u{049F}', '\u{049F}'),
1771    ('\u{04A1}', '\u{04A1}'),
1772    ('\u{04A3}', '\u{04A3}'),
1773    ('\u{04A5}', '\u{04A5}'),
1774    ('\u{04A7}', '\u{04A7}'),
1775    ('\u{04A9}', '\u{04A9}'),
1776    ('\u{04AB}', '\u{04AB}'),
1777    ('\u{04AD}', '\u{04AD}'),
1778    ('\u{04AF}', '\u{04AF}'),
1779    ('\u{04B1}', '\u{04B1}'),
1780    ('\u{04B3}', '\u{04B3}'),
1781    ('\u{04B5}', '\u{04B5}'),
1782    ('\u{04B7}', '\u{04B7}'),
1783    ('\u{04B9}', '\u{04B9}'),
1784    ('\u{04BB}', '\u{04BB}'),
1785    ('\u{04BD}', '\u{04BD}'),
1786    ('\u{04BF}', '\u{04BF}'),
1787    ('\u{04C2}', '\u{04C2}'),
1788    ('\u{04C4}', '\u{04C4}'),
1789    ('\u{04C6}', '\u{04C6}'),
1790    ('\u{04C8}', '\u{04C8}'),
1791    ('\u{04CA}', '\u{04CA}'),
1792    ('\u{04CC}', '\u{04CC}'),
1793    ('\u{04CE}', '\u{04CF}'),
1794    ('\u{04D1}', '\u{04D1}'),
1795    ('\u{04D3}', '\u{04D3}'),
1796    ('\u{04D5}', '\u{04D5}'),
1797    ('\u{04D7}', '\u{04D7}'),
1798    ('\u{04D9}', '\u{04D9}'),
1799    ('\u{04DB}', '\u{04DB}'),
1800    ('\u{04DD}', '\u{04DD}'),
1801    ('\u{04DF}', '\u{04DF}'),
1802    ('\u{04E1}', '\u{04E1}'),
1803    ('\u{04E3}', '\u{04E3}'),
1804    ('\u{04E5}', '\u{04E5}'),
1805    ('\u{04E7}', '\u{04E7}'),
1806    ('\u{04E9}', '\u{04E9}'),
1807    ('\u{04EB}', '\u{04EB}'),
1808    ('\u{04ED}', '\u{04ED}'),
1809    ('\u{04EF}', '\u{04EF}'),
1810    ('\u{04F1}', '\u{04F1}'),
1811    ('\u{04F3}', '\u{04F3}'),
1812    ('\u{04F5}', '\u{04F5}'),
1813    ('\u{04F7}', '\u{04F7}'),
1814    ('\u{04F9}', '\u{04F9}'),
1815    ('\u{04FB}', '\u{04FB}'),
1816    ('\u{04FD}', '\u{04FD}'),
1817    ('\u{04FF}', '\u{04FF}'),
1818    ('\u{0501}', '\u{0501}'),
1819    ('\u{0503}', '\u{0503}'),
1820    ('\u{0505}', '\u{0505}'),
1821    ('\u{0507}', '\u{0507}'),
1822    ('\u{0509}', '\u{0509}'),
1823    ('\u{050B}', '\u{050B}'),
1824    ('\u{050D}', '\u{050D}'),
1825    ('\u{050F}', '\u{050F}'),
1826    ('\u{0511}', '\u{0511}'),
1827    ('\u{0513}', '\u{0513}'),
1828    ('\u{0515}', '\u{0515}'),
1829    ('\u{0517}', '\u{0517}'),
1830    ('\u{0519}', '\u{0519}'),
1831    ('\u{051B}', '\u{051B}'),
1832    ('\u{051D}', '\u{051D}'),
1833    ('\u{051F}', '\u{051F}'),
1834    ('\u{0521}', '\u{0521}'),
1835    ('\u{0523}', '\u{0523}'),
1836    ('\u{0525}', '\u{0525}'),
1837    ('\u{0527}', '\u{0527}'),
1838    ('\u{0529}', '\u{0529}'),
1839    ('\u{052B}', '\u{052B}'),
1840    ('\u{052D}', '\u{052D}'),
1841    ('\u{052F}', '\u{052F}'),
1842    ('\u{0560}', '\u{0588}'),
1843    ('\u{10D0}', '\u{10FA}'),
1844    ('\u{10FC}', '\u{10FF}'),
1845    ('\u{13F8}', '\u{13FD}'),
1846    ('\u{1C80}', '\u{1C88}'),
1847    ('\u{1D00}', '\u{1DBF}'),
1848    ('\u{1E01}', '\u{1E01}'),
1849    ('\u{1E03}', '\u{1E03}'),
1850    ('\u{1E05}', '\u{1E05}'),
1851    ('\u{1E07}', '\u{1E07}'),
1852    ('\u{1E09}', '\u{1E09}'),
1853    ('\u{1E0B}', '\u{1E0B}'),
1854    ('\u{1E0D}', '\u{1E0D}'),
1855    ('\u{1E0F}', '\u{1E0F}'),
1856    ('\u{1E11}', '\u{1E11}'),
1857    ('\u{1E13}', '\u{1E13}'),
1858    ('\u{1E15}', '\u{1E15}'),
1859    ('\u{1E17}', '\u{1E17}'),
1860    ('\u{1E19}', '\u{1E19}'),
1861    ('\u{1E1B}', '\u{1E1B}'),
1862    ('\u{1E1D}', '\u{1E1D}'),
1863    ('\u{1E1F}', '\u{1E1F}'),
1864    ('\u{1E21}', '\u{1E21}'),
1865    ('\u{1E23}', '\u{1E23}'),
1866    ('\u{1E25}', '\u{1E25}'),
1867    ('\u{1E27}', '\u{1E27}'),
1868    ('\u{1E29}', '\u{1E29}'),
1869    ('\u{1E2B}', '\u{1E2B}'),
1870    ('\u{1E2D}', '\u{1E2D}'),
1871    ('\u{1E2F}', '\u{1E2F}'),
1872    ('\u{1E31}', '\u{1E31}'),
1873    ('\u{1E33}', '\u{1E33}'),
1874    ('\u{1E35}', '\u{1E35}'),
1875    ('\u{1E37}', '\u{1E37}'),
1876    ('\u{1E39}', '\u{1E39}'),
1877    ('\u{1E3B}', '\u{1E3B}'),
1878    ('\u{1E3D}', '\u{1E3D}'),
1879    ('\u{1E3F}', '\u{1E3F}'),
1880    ('\u{1E41}', '\u{1E41}'),
1881    ('\u{1E43}', '\u{1E43}'),
1882    ('\u{1E45}', '\u{1E45}'),
1883    ('\u{1E47}', '\u{1E47}'),
1884    ('\u{1E49}', '\u{1E49}'),
1885    ('\u{1E4B}', '\u{1E4B}'),
1886    ('\u{1E4D}', '\u{1E4D}'),
1887    ('\u{1E4F}', '\u{1E4F}'),
1888    ('\u{1E51}', '\u{1E51}'),
1889    ('\u{1E53}', '\u{1E53}'),
1890    ('\u{1E55}', '\u{1E55}'),
1891    ('\u{1E57}', '\u{1E57}'),
1892    ('\u{1E59}', '\u{1E59}'),
1893    ('\u{1E5B}', '\u{1E5B}'),
1894    ('\u{1E5D}', '\u{1E5D}'),
1895    ('\u{1E5F}', '\u{1E5F}'),
1896    ('\u{1E61}', '\u{1E61}'),
1897    ('\u{1E63}', '\u{1E63}'),
1898    ('\u{1E65}', '\u{1E65}'),
1899    ('\u{1E67}', '\u{1E67}'),
1900    ('\u{1E69}', '\u{1E69}'),
1901    ('\u{1E6B}', '\u{1E6B}'),
1902    ('\u{1E6D}', '\u{1E6D}'),
1903    ('\u{1E6F}', '\u{1E6F}'),
1904    ('\u{1E71}', '\u{1E71}'),
1905    ('\u{1E73}', '\u{1E73}'),
1906    ('\u{1E75}', '\u{1E75}'),
1907    ('\u{1E77}', '\u{1E77}'),
1908    ('\u{1E79}', '\u{1E79}'),
1909    ('\u{1E7B}', '\u{1E7B}'),
1910    ('\u{1E7D}', '\u{1E7D}'),
1911    ('\u{1E7F}', '\u{1E7F}'),
1912    ('\u{1E81}', '\u{1E81}'),
1913    ('\u{1E83}', '\u{1E83}'),
1914    ('\u{1E85}', '\u{1E85}'),
1915    ('\u{1E87}', '\u{1E87}'),
1916    ('\u{1E89}', '\u{1E89}'),
1917    ('\u{1E8B}', '\u{1E8B}'),
1918    ('\u{1E8D}', '\u{1E8D}'),
1919    ('\u{1E8F}', '\u{1E8F}'),
1920    ('\u{1E91}', '\u{1E91}'),
1921    ('\u{1E93}', '\u{1E93}'),
1922    ('\u{1E95}', '\u{1E9D}'),
1923    ('\u{1E9F}', '\u{1E9F}'),
1924    ('\u{1EA1}', '\u{1EA1}'),
1925    ('\u{1EA3}', '\u{1EA3}'),
1926    ('\u{1EA5}', '\u{1EA5}'),
1927    ('\u{1EA7}', '\u{1EA7}'),
1928    ('\u{1EA9}', '\u{1EA9}'),
1929    ('\u{1EAB}', '\u{1EAB}'),
1930    ('\u{1EAD}', '\u{1EAD}'),
1931    ('\u{1EAF}', '\u{1EAF}'),
1932    ('\u{1EB1}', '\u{1EB1}'),
1933    ('\u{1EB3}', '\u{1EB3}'),
1934    ('\u{1EB5}', '\u{1EB5}'),
1935    ('\u{1EB7}', '\u{1EB7}'),
1936    ('\u{1EB9}', '\u{1EB9}'),
1937    ('\u{1EBB}', '\u{1EBB}'),
1938    ('\u{1EBD}', '\u{1EBD}'),
1939    ('\u{1EBF}', '\u{1EBF}'),
1940    ('\u{1EC1}', '\u{1EC1}'),
1941    ('\u{1EC3}', '\u{1EC3}'),
1942    ('\u{1EC5}', '\u{1EC5}'),
1943    ('\u{1EC7}', '\u{1EC7}'),
1944    ('\u{1EC9}', '\u{1EC9}'),
1945    ('\u{1ECB}', '\u{1ECB}'),
1946    ('\u{1ECD}', '\u{1ECD}'),
1947    ('\u{1ECF}', '\u{1ECF}'),
1948    ('\u{1ED1}', '\u{1ED1}'),
1949    ('\u{1ED3}', '\u{1ED3}'),
1950    ('\u{1ED5}', '\u{1ED5}'),
1951    ('\u{1ED7}', '\u{1ED7}'),
1952    ('\u{1ED9}', '\u{1ED9}'),
1953    ('\u{1EDB}', '\u{1EDB}'),
1954    ('\u{1EDD}', '\u{1EDD}'),
1955    ('\u{1EDF}', '\u{1EDF}'),
1956    ('\u{1EE1}', '\u{1EE1}'),
1957    ('\u{1EE3}', '\u{1EE3}'),
1958    ('\u{1EE5}', '\u{1EE5}'),
1959    ('\u{1EE7}', '\u{1EE7}'),
1960    ('\u{1EE9}', '\u{1EE9}'),
1961    ('\u{1EEB}', '\u{1EEB}'),
1962    ('\u{1EED}', '\u{1EED}'),
1963    ('\u{1EEF}', '\u{1EEF}'),
1964    ('\u{1EF1}', '\u{1EF1}'),
1965    ('\u{1EF3}', '\u{1EF3}'),
1966    ('\u{1EF5}', '\u{1EF5}'),
1967    ('\u{1EF7}', '\u{1EF7}'),
1968    ('\u{1EF9}', '\u{1EF9}'),
1969    ('\u{1EFB}', '\u{1EFB}'),
1970    ('\u{1EFD}', '\u{1EFD}'),
1971    ('\u{1EFF}', '\u{1F07}'),
1972    ('\u{1F10}', '\u{1F15}'),
1973    ('\u{1F20}', '\u{1F27}'),
1974    ('\u{1F30}', '\u{1F37}'),
1975    ('\u{1F40}', '\u{1F45}'),
1976    ('\u{1F50}', '\u{1F57}'),
1977    ('\u{1F60}', '\u{1F67}'),
1978    ('\u{1F70}', '\u{1F7D}'),
1979    ('\u{1F80}', '\u{1F87}'),
1980    ('\u{1F90}', '\u{1F97}'),
1981    ('\u{1FA0}', '\u{1FA7}'),
1982    ('\u{1FB0}', '\u{1FB4}'),
1983    ('\u{1FB6}', '\u{1FB7}'),
1984    ('\u{1FBE}', '\u{1FBE}'),
1985    ('\u{1FC2}', '\u{1FC4}'),
1986    ('\u{1FC6}', '\u{1FC7}'),
1987    ('\u{1FD0}', '\u{1FD3}'),
1988    ('\u{1FD6}', '\u{1FD7}'),
1989    ('\u{1FE0}', '\u{1FE7}'),
1990    ('\u{1FF2}', '\u{1FF4}'),
1991    ('\u{1FF6}', '\u{1FF7}'),
1992    ('\u{2071}', '\u{2071}'),
1993    ('\u{207F}', '\u{207F}'),
1994    ('\u{2090}', '\u{209C}'),
1995    ('\u{210A}', '\u{210A}'),
1996    ('\u{210E}', '\u{210F}'),
1997    ('\u{2113}', '\u{2113}'),
1998    ('\u{212F}', '\u{212F}'),
1999    ('\u{2134}', '\u{2134}'),
2000    ('\u{2139}', '\u{2139}'),
2001    ('\u{213C}', '\u{213D}'),
2002    ('\u{2146}', '\u{2149}'),
2003    ('\u{214E}', '\u{214E}'),
2004    ('\u{2170}', '\u{217F}'),
2005    ('\u{2184}', '\u{2184}'),
2006    ('\u{24D0}', '\u{24E9}'),
2007    ('\u{2C30}', '\u{2C5F}'),
2008    ('\u{2C61}', '\u{2C61}'),
2009    ('\u{2C65}', '\u{2C66}'),
2010    ('\u{2C68}', '\u{2C68}'),
2011    ('\u{2C6A}', '\u{2C6A}'),
2012    ('\u{2C6C}', '\u{2C6C}'),
2013    ('\u{2C71}', '\u{2C71}'),
2014    ('\u{2C73}', '\u{2C74}'),
2015    ('\u{2C76}', '\u{2C7D}'),
2016    ('\u{2C81}', '\u{2C81}'),
2017    ('\u{2C83}', '\u{2C83}'),
2018    ('\u{2C85}', '\u{2C85}'),
2019    ('\u{2C87}', '\u{2C87}'),
2020    ('\u{2C89}', '\u{2C89}'),
2021    ('\u{2C8B}', '\u{2C8B}'),
2022    ('\u{2C8D}', '\u{2C8D}'),
2023    ('\u{2C8F}', '\u{2C8F}'),
2024    ('\u{2C91}', '\u{2C91}'),
2025    ('\u{2C93}', '\u{2C93}'),
2026    ('\u{2C95}', '\u{2C95}'),
2027    ('\u{2C97}', '\u{2C97}'),
2028    ('\u{2C99}', '\u{2C99}'),
2029    ('\u{2C9B}', '\u{2C9B}'),
2030    ('\u{2C9D}', '\u{2C9D}'),
2031    ('\u{2C9F}', '\u{2C9F}'),
2032    ('\u{2CA1}', '\u{2CA1}'),
2033    ('\u{2CA3}', '\u{2CA3}'),
2034    ('\u{2CA5}', '\u{2CA5}'),
2035    ('\u{2CA7}', '\u{2CA7}'),
2036    ('\u{2CA9}', '\u{2CA9}'),
2037    ('\u{2CAB}', '\u{2CAB}'),
2038    ('\u{2CAD}', '\u{2CAD}'),
2039    ('\u{2CAF}', '\u{2CAF}'),
2040    ('\u{2CB1}', '\u{2CB1}'),
2041    ('\u{2CB3}', '\u{2CB3}'),
2042    ('\u{2CB5}', '\u{2CB5}'),
2043    ('\u{2CB7}', '\u{2CB7}'),
2044    ('\u{2CB9}', '\u{2CB9}'),
2045    ('\u{2CBB}', '\u{2CBB}'),
2046    ('\u{2CBD}', '\u{2CBD}'),
2047    ('\u{2CBF}', '\u{2CBF}'),
2048    ('\u{2CC1}', '\u{2CC1}'),
2049    ('\u{2CC3}', '\u{2CC3}'),
2050    ('\u{2CC5}', '\u{2CC5}'),
2051    ('\u{2CC7}', '\u{2CC7}'),
2052    ('\u{2CC9}', '\u{2CC9}'),
2053    ('\u{2CCB}', '\u{2CCB}'),
2054    ('\u{2CCD}', '\u{2CCD}'),
2055    ('\u{2CCF}', '\u{2CCF}'),
2056    ('\u{2CD1}', '\u{2CD1}'),
2057    ('\u{2CD3}', '\u{2CD3}'),
2058    ('\u{2CD5}', '\u{2CD5}'),
2059    ('\u{2CD7}', '\u{2CD7}'),
2060    ('\u{2CD9}', '\u{2CD9}'),
2061    ('\u{2CDB}', '\u{2CDB}'),
2062    ('\u{2CDD}', '\u{2CDD}'),
2063    ('\u{2CDF}', '\u{2CDF}'),
2064    ('\u{2CE1}', '\u{2CE1}'),
2065    ('\u{2CE3}', '\u{2CE4}'),
2066    ('\u{2CEC}', '\u{2CEC}'),
2067    ('\u{2CEE}', '\u{2CEE}'),
2068    ('\u{2CF3}', '\u{2CF3}'),
2069    ('\u{2D00}', '\u{2D25}'),
2070    ('\u{2D27}', '\u{2D27}'),
2071    ('\u{2D2D}', '\u{2D2D}'),
2072    ('\u{A641}', '\u{A641}'),
2073    ('\u{A643}', '\u{A643}'),
2074    ('\u{A645}', '\u{A645}'),
2075    ('\u{A647}', '\u{A647}'),
2076    ('\u{A649}', '\u{A649}'),
2077    ('\u{A64B}', '\u{A64B}'),
2078    ('\u{A64D}', '\u{A64D}'),
2079    ('\u{A64F}', '\u{A64F}'),
2080    ('\u{A651}', '\u{A651}'),
2081    ('\u{A653}', '\u{A653}'),
2082    ('\u{A655}', '\u{A655}'),
2083    ('\u{A657}', '\u{A657}'),
2084    ('\u{A659}', '\u{A659}'),
2085    ('\u{A65B}', '\u{A65B}'),
2086    ('\u{A65D}', '\u{A65D}'),
2087    ('\u{A65F}', '\u{A65F}'),
2088    ('\u{A661}', '\u{A661}'),
2089    ('\u{A663}', '\u{A663}'),
2090    ('\u{A665}', '\u{A665}'),
2091    ('\u{A667}', '\u{A667}'),
2092    ('\u{A669}', '\u{A669}'),
2093    ('\u{A66B}', '\u{A66B}'),
2094    ('\u{A66D}', '\u{A66D}'),
2095    ('\u{A681}', '\u{A681}'),
2096    ('\u{A683}', '\u{A683}'),
2097    ('\u{A685}', '\u{A685}'),
2098    ('\u{A687}', '\u{A687}'),
2099    ('\u{A689}', '\u{A689}'),
2100    ('\u{A68B}', '\u{A68B}'),
2101    ('\u{A68D}', '\u{A68D}'),
2102    ('\u{A68F}', '\u{A68F}'),
2103    ('\u{A691}', '\u{A691}'),
2104    ('\u{A693}', '\u{A693}'),
2105    ('\u{A695}', '\u{A695}'),
2106    ('\u{A697}', '\u{A697}'),
2107    ('\u{A699}', '\u{A699}'),
2108    ('\u{A69B}', '\u{A69D}'),
2109    ('\u{A723}', '\u{A723}'),
2110    ('\u{A725}', '\u{A725}'),
2111    ('\u{A727}', '\u{A727}'),
2112    ('\u{A729}', '\u{A729}'),
2113    ('\u{A72B}', '\u{A72B}'),
2114    ('\u{A72D}', '\u{A72D}'),
2115    ('\u{A72F}', '\u{A731}'),
2116    ('\u{A733}', '\u{A733}'),
2117    ('\u{A735}', '\u{A735}'),
2118    ('\u{A737}', '\u{A737}'),
2119    ('\u{A739}', '\u{A739}'),
2120    ('\u{A73B}', '\u{A73B}'),
2121    ('\u{A73D}', '\u{A73D}'),
2122    ('\u{A73F}', '\u{A73F}'),
2123    ('\u{A741}', '\u{A741}'),
2124    ('\u{A743}', '\u{A743}'),
2125    ('\u{A745}', '\u{A745}'),
2126    ('\u{A747}', '\u{A747}'),
2127    ('\u{A749}', '\u{A749}'),
2128    ('\u{A74B}', '\u{A74B}'),
2129    ('\u{A74D}', '\u{A74D}'),
2130    ('\u{A74F}', '\u{A74F}'),
2131    ('\u{A751}', '\u{A751}'),
2132    ('\u{A753}', '\u{A753}'),
2133    ('\u{A755}', '\u{A755}'),
2134    ('\u{A757}', '\u{A757}'),
2135    ('\u{A759}', '\u{A759}'),
2136    ('\u{A75B}', '\u{A75B}'),
2137    ('\u{A75D}', '\u{A75D}'),
2138    ('\u{A75F}', '\u{A75F}'),
2139    ('\u{A761}', '\u{A761}'),
2140    ('\u{A763}', '\u{A763}'),
2141    ('\u{A765}', '\u{A765}'),
2142    ('\u{A767}', '\u{A767}'),
2143    ('\u{A769}', '\u{A769}'),
2144    ('\u{A76B}', '\u{A76B}'),
2145    ('\u{A76D}', '\u{A76D}'),
2146    ('\u{A76F}', '\u{A778}'),
2147    ('\u{A77A}', '\u{A77A}'),
2148    ('\u{A77C}', '\u{A77C}'),
2149    ('\u{A77F}', '\u{A77F}'),
2150    ('\u{A781}', '\u{A781}'),
2151    ('\u{A783}', '\u{A783}'),
2152    ('\u{A785}', '\u{A785}'),
2153    ('\u{A787}', '\u{A787}'),
2154    ('\u{A78C}', '\u{A78C}'),
2155    ('\u{A78E}', '\u{A78E}'),
2156    ('\u{A791}', '\u{A791}'),
2157    ('\u{A793}', '\u{A795}'),
2158    ('\u{A797}', '\u{A797}'),
2159    ('\u{A799}', '\u{A799}'),
2160    ('\u{A79B}', '\u{A79B}'),
2161    ('\u{A79D}', '\u{A79D}'),
2162    ('\u{A79F}', '\u{A79F}'),
2163    ('\u{A7A1}', '\u{A7A1}'),
2164    ('\u{A7A3}', '\u{A7A3}'),
2165    ('\u{A7A5}', '\u{A7A5}'),
2166    ('\u{A7A7}', '\u{A7A7}'),
2167    ('\u{A7A9}', '\u{A7A9}'),
2168    ('\u{A7AF}', '\u{A7AF}'),
2169    ('\u{A7B5}', '\u{A7B5}'),
2170    ('\u{A7B7}', '\u{A7B7}'),
2171    ('\u{A7B9}', '\u{A7B9}'),
2172    ('\u{A7BB}', '\u{A7BB}'),
2173    ('\u{A7BD}', '\u{A7BD}'),
2174    ('\u{A7BF}', '\u{A7BF}'),
2175    ('\u{A7C1}', '\u{A7C1}'),
2176    ('\u{A7C3}', '\u{A7C3}'),
2177    ('\u{A7C8}', '\u{A7C8}'),
2178    ('\u{A7CA}', '\u{A7CA}'),
2179    ('\u{A7D1}', '\u{A7D1}'),
2180    ('\u{A7D3}', '\u{A7D3}'),
2181    ('\u{A7D5}', '\u{A7D5}'),
2182    ('\u{A7D7}', '\u{A7D7}'),
2183    ('\u{A7D9}', '\u{A7D9}'),
2184    ('\u{A7F2}', '\u{A7F4}'),
2185    ('\u{A7F6}', '\u{A7F6}'),
2186    ('\u{A7F8}', '\u{A7FA}'),
2187    ('\u{AB30}', '\u{AB5A}'),
2188    ('\u{AB5C}', '\u{AB69}'),
2189    ('\u{AB70}', '\u{ABBF}'),
2190    ('\u{FB00}', '\u{FB06}'),
2191    ('\u{FB13}', '\u{FB17}'),
2192    ('\u{FF41}', '\u{FF5A}'),
2193    ('\u{10428}', '\u{1044F}'),
2194    ('\u{104D8}', '\u{104FB}'),
2195    ('\u{10597}', '\u{105A1}'),
2196    ('\u{105A3}', '\u{105B1}'),
2197    ('\u{105B3}', '\u{105B9}'),
2198    ('\u{105BB}', '\u{105BC}'),
2199    ('\u{10780}', '\u{10780}'),
2200    ('\u{10783}', '\u{10785}'),
2201    ('\u{10787}', '\u{107B0}'),
2202    ('\u{107B2}', '\u{107BA}'),
2203    ('\u{10CC0}', '\u{10CF2}'),
2204    ('\u{118C0}', '\u{118DF}'),
2205    ('\u{16E60}', '\u{16E7F}'),
2206    ('\u{1D41A}', '\u{1D433}'),
2207    ('\u{1D44E}', '\u{1D454}'),
2208    ('\u{1D456}', '\u{1D467}'),
2209    ('\u{1D482}', '\u{1D49B}'),
2210    ('\u{1D4B6}', '\u{1D4B9}'),
2211    ('\u{1D4BB}', '\u{1D4BB}'),
2212    ('\u{1D4BD}', '\u{1D4C3}'),
2213    ('\u{1D4C5}', '\u{1D4CF}'),
2214    ('\u{1D4EA}', '\u{1D503}'),
2215    ('\u{1D51E}', '\u{1D537}'),
2216    ('\u{1D552}', '\u{1D56B}'),
2217    ('\u{1D586}', '\u{1D59F}'),
2218    ('\u{1D5BA}', '\u{1D5D3}'),
2219    ('\u{1D5EE}', '\u{1D607}'),
2220    ('\u{1D622}', '\u{1D63B}'),
2221    ('\u{1D656}', '\u{1D66F}'),
2222    ('\u{1D68A}', '\u{1D6A5}'),
2223    ('\u{1D6C2}', '\u{1D6DA}'),
2224    ('\u{1D6DC}', '\u{1D6E1}'),
2225    ('\u{1D6FC}', '\u{1D714}'),
2226    ('\u{1D716}', '\u{1D71B}'),
2227    ('\u{1D736}', '\u{1D74E}'),
2228    ('\u{1D750}', '\u{1D755}'),
2229    ('\u{1D770}', '\u{1D788}'),
2230    ('\u{1D78A}', '\u{1D78F}'),
2231    ('\u{1D7AA}', '\u{1D7C2}'),
2232    ('\u{1D7C4}', '\u{1D7C9}'),
2233    ('\u{1D7CB}', '\u{1D7CB}'),
2234    ('\u{1DF00}', '\u{1DF09}'),
2235    ('\u{1DF0B}', '\u{1DF1E}'),
2236    ('\u{1DF25}', '\u{1DF2A}'),
2237    ('\u{1E030}', '\u{1E06D}'),
2238    ('\u{1E922}', '\u{1E943}'),
2239];
2240
2241/// Inclusive codepoint ranges of characters in Unicode general category
2242/// `Lt` (titlecase letters such as `Dž`, `Lj`, `Nj`, `Dz`). Mirrors
2243/// CPython's `Py_UNICODE_ISTITLE`. These count as neither upper nor
2244/// lower but ARE cased, and open a title-word.
2245///
2246/// Derived live from the oracle CPython 3.13 / Unicode 15.1.0.
2247const TITLE_RANGES: &[(char, char)] = &[
2248    ('\u{01C5}', '\u{01C5}'),
2249    ('\u{01C8}', '\u{01C8}'),
2250    ('\u{01CB}', '\u{01CB}'),
2251    ('\u{01F2}', '\u{01F2}'),
2252    ('\u{1F88}', '\u{1F8F}'),
2253    ('\u{1F98}', '\u{1F9F}'),
2254    ('\u{1FA8}', '\u{1FAF}'),
2255    ('\u{1FBC}', '\u{1FBC}'),
2256    ('\u{1FCC}', '\u{1FCC}'),
2257    ('\u{1FFC}', '\u{1FFC}'),
2258];
2259
2260/// Inclusive codepoint ranges of characters for which CPython's
2261/// `str.isspace()` returns `True` (Unicode `White_Space` plus the bidi
2262/// `WS`/`B`/`S` C0 controls `U+001C`..`U+001F`). BROADER than Rust's
2263/// `char::is_whitespace` (the `White_Space` property only), which omits
2264/// the file/group/record/unit separators.
2265///
2266/// Derived live from the oracle CPython 3.13 / Unicode 15.1.0.
2267const WHITESPACE_RANGES: &[(char, char)] = &[
2268    ('\u{0009}', '\u{000D}'),
2269    ('\u{001C}', '\u{0020}'),
2270    ('\u{0085}', '\u{0085}'),
2271    ('\u{00A0}', '\u{00A0}'),
2272    ('\u{1680}', '\u{1680}'),
2273    ('\u{2000}', '\u{200A}'),
2274    ('\u{2028}', '\u{2029}'),
2275    ('\u{202F}', '\u{202F}'),
2276    ('\u{205F}', '\u{205F}'),
2277    ('\u{3000}', '\u{3000}'),
2278];
2279
2280/// Inclusive codepoint ranges of characters with Unicode `Numeric_Type`
2281/// of Decimal, Digit, **or** Numeric — i.e. exactly the set for which
2282/// CPython's `str.isnumeric()` returns `True` for a single character
2283/// (`Py_UNICODE_ISNUMERIC`). This is the BROADEST of the numeric
2284/// predicates: it is a superset of [`DIGIT_RANGES`] (which is itself a
2285/// superset of [`DECIMAL_RANGES`]), additionally accepting fractions
2286/// (`½`), letter-numerals such as roman `Ⅻ` (category `Nl`), and
2287/// ideographic numerals such as `一` (U+4E00, category `Lo`).
2288///
2289/// Derived live from the oracle CPython 3.13 / Unicode 15.1.0 (the
2290/// version backing the installed `numpy` 2.4 build that this crate is
2291/// verified against):
2292/// `[c for c in range(0x110000) if chr(c).isnumeric()]`, run-length
2293/// compressed to 219 ranges. Rust's `char::is_numeric` tests the
2294/// general categories `Nd|Nl|No` instead of the `Numeric_Type`
2295/// property, so it MISSES ideographic numerals (category `Lo`, e.g.
2296/// `一`) and INCLUDES codepoints assigned after Unicode 15.1.0; the
2297/// table is therefore embedded directly rather than computed from
2298/// `std` or a Unicode crate.
2299const NUMERIC_TYPE_RANGES: &[(char, char)] = &[
2300    ('\u{0030}', '\u{0039}'),
2301    ('\u{00B2}', '\u{00B3}'),
2302    ('\u{00B9}', '\u{00B9}'),
2303    ('\u{00BC}', '\u{00BE}'),
2304    ('\u{0660}', '\u{0669}'),
2305    ('\u{06F0}', '\u{06F9}'),
2306    ('\u{07C0}', '\u{07C9}'),
2307    ('\u{0966}', '\u{096F}'),
2308    ('\u{09E6}', '\u{09EF}'),
2309    ('\u{09F4}', '\u{09F9}'),
2310    ('\u{0A66}', '\u{0A6F}'),
2311    ('\u{0AE6}', '\u{0AEF}'),
2312    ('\u{0B66}', '\u{0B6F}'),
2313    ('\u{0B72}', '\u{0B77}'),
2314    ('\u{0BE6}', '\u{0BF2}'),
2315    ('\u{0C66}', '\u{0C6F}'),
2316    ('\u{0C78}', '\u{0C7E}'),
2317    ('\u{0CE6}', '\u{0CEF}'),
2318    ('\u{0D58}', '\u{0D5E}'),
2319    ('\u{0D66}', '\u{0D78}'),
2320    ('\u{0DE6}', '\u{0DEF}'),
2321    ('\u{0E50}', '\u{0E59}'),
2322    ('\u{0ED0}', '\u{0ED9}'),
2323    ('\u{0F20}', '\u{0F33}'),
2324    ('\u{1040}', '\u{1049}'),
2325    ('\u{1090}', '\u{1099}'),
2326    ('\u{1369}', '\u{137C}'),
2327    ('\u{16EE}', '\u{16F0}'),
2328    ('\u{17E0}', '\u{17E9}'),
2329    ('\u{17F0}', '\u{17F9}'),
2330    ('\u{1810}', '\u{1819}'),
2331    ('\u{1946}', '\u{194F}'),
2332    ('\u{19D0}', '\u{19DA}'),
2333    ('\u{1A80}', '\u{1A89}'),
2334    ('\u{1A90}', '\u{1A99}'),
2335    ('\u{1B50}', '\u{1B59}'),
2336    ('\u{1BB0}', '\u{1BB9}'),
2337    ('\u{1C40}', '\u{1C49}'),
2338    ('\u{1C50}', '\u{1C59}'),
2339    ('\u{2070}', '\u{2070}'),
2340    ('\u{2074}', '\u{2079}'),
2341    ('\u{2080}', '\u{2089}'),
2342    ('\u{2150}', '\u{2182}'),
2343    ('\u{2185}', '\u{2189}'),
2344    ('\u{2460}', '\u{249B}'),
2345    ('\u{24EA}', '\u{24FF}'),
2346    ('\u{2776}', '\u{2793}'),
2347    ('\u{2CFD}', '\u{2CFD}'),
2348    ('\u{3007}', '\u{3007}'),
2349    ('\u{3021}', '\u{3029}'),
2350    ('\u{3038}', '\u{303A}'),
2351    ('\u{3192}', '\u{3195}'),
2352    ('\u{3220}', '\u{3229}'),
2353    ('\u{3248}', '\u{324F}'),
2354    ('\u{3251}', '\u{325F}'),
2355    ('\u{3280}', '\u{3289}'),
2356    ('\u{32B1}', '\u{32BF}'),
2357    ('\u{3405}', '\u{3405}'),
2358    ('\u{3483}', '\u{3483}'),
2359    ('\u{382A}', '\u{382A}'),
2360    ('\u{3B4D}', '\u{3B4D}'),
2361    ('\u{4E00}', '\u{4E00}'),
2362    ('\u{4E03}', '\u{4E03}'),
2363    ('\u{4E07}', '\u{4E07}'),
2364    ('\u{4E09}', '\u{4E09}'),
2365    ('\u{4E24}', '\u{4E24}'),
2366    ('\u{4E5D}', '\u{4E5D}'),
2367    ('\u{4E8C}', '\u{4E8C}'),
2368    ('\u{4E94}', '\u{4E94}'),
2369    ('\u{4E96}', '\u{4E96}'),
2370    ('\u{4EAC}', '\u{4EAC}'),
2371    ('\u{4EBF}', '\u{4EC0}'),
2372    ('\u{4EDF}', '\u{4EDF}'),
2373    ('\u{4EE8}', '\u{4EE8}'),
2374    ('\u{4F0D}', '\u{4F0D}'),
2375    ('\u{4F70}', '\u{4F70}'),
2376    ('\u{4FE9}', '\u{4FE9}'),
2377    ('\u{5006}', '\u{5006}'),
2378    ('\u{5104}', '\u{5104}'),
2379    ('\u{5146}', '\u{5146}'),
2380    ('\u{5169}', '\u{5169}'),
2381    ('\u{516B}', '\u{516B}'),
2382    ('\u{516D}', '\u{516D}'),
2383    ('\u{5341}', '\u{5341}'),
2384    ('\u{5343}', '\u{5345}'),
2385    ('\u{534C}', '\u{534C}'),
2386    ('\u{53C1}', '\u{53C4}'),
2387    ('\u{56DB}', '\u{56DB}'),
2388    ('\u{58F1}', '\u{58F1}'),
2389    ('\u{58F9}', '\u{58F9}'),
2390    ('\u{5E7A}', '\u{5E7A}'),
2391    ('\u{5EFE}', '\u{5EFF}'),
2392    ('\u{5F0C}', '\u{5F0E}'),
2393    ('\u{5F10}', '\u{5F10}'),
2394    ('\u{62D0}', '\u{62D0}'),
2395    ('\u{62FE}', '\u{62FE}'),
2396    ('\u{634C}', '\u{634C}'),
2397    ('\u{67D2}', '\u{67D2}'),
2398    ('\u{6D1E}', '\u{6D1E}'),
2399    ('\u{6F06}', '\u{6F06}'),
2400    ('\u{7396}', '\u{7396}'),
2401    ('\u{767E}', '\u{767E}'),
2402    ('\u{7695}', '\u{7695}'),
2403    ('\u{79ED}', '\u{79ED}'),
2404    ('\u{8086}', '\u{8086}'),
2405    ('\u{842C}', '\u{842C}'),
2406    ('\u{8CAE}', '\u{8CAE}'),
2407    ('\u{8CB3}', '\u{8CB3}'),
2408    ('\u{8D30}', '\u{8D30}'),
2409    ('\u{920E}', '\u{920E}'),
2410    ('\u{94A9}', '\u{94A9}'),
2411    ('\u{9621}', '\u{9621}'),
2412    ('\u{9646}', '\u{9646}'),
2413    ('\u{964C}', '\u{964C}'),
2414    ('\u{9678}', '\u{9678}'),
2415    ('\u{96F6}', '\u{96F6}'),
2416    ('\u{A620}', '\u{A629}'),
2417    ('\u{A6E6}', '\u{A6EF}'),
2418    ('\u{A830}', '\u{A835}'),
2419    ('\u{A8D0}', '\u{A8D9}'),
2420    ('\u{A900}', '\u{A909}'),
2421    ('\u{A9D0}', '\u{A9D9}'),
2422    ('\u{A9F0}', '\u{A9F9}'),
2423    ('\u{AA50}', '\u{AA59}'),
2424    ('\u{ABF0}', '\u{ABF9}'),
2425    ('\u{F96B}', '\u{F96B}'),
2426    ('\u{F973}', '\u{F973}'),
2427    ('\u{F978}', '\u{F978}'),
2428    ('\u{F9B2}', '\u{F9B2}'),
2429    ('\u{F9D1}', '\u{F9D1}'),
2430    ('\u{F9D3}', '\u{F9D3}'),
2431    ('\u{F9FD}', '\u{F9FD}'),
2432    ('\u{FF10}', '\u{FF19}'),
2433    ('\u{10107}', '\u{10133}'),
2434    ('\u{10140}', '\u{10178}'),
2435    ('\u{1018A}', '\u{1018B}'),
2436    ('\u{102E1}', '\u{102FB}'),
2437    ('\u{10320}', '\u{10323}'),
2438    ('\u{10341}', '\u{10341}'),
2439    ('\u{1034A}', '\u{1034A}'),
2440    ('\u{103D1}', '\u{103D5}'),
2441    ('\u{104A0}', '\u{104A9}'),
2442    ('\u{10858}', '\u{1085F}'),
2443    ('\u{10879}', '\u{1087F}'),
2444    ('\u{108A7}', '\u{108AF}'),
2445    ('\u{108FB}', '\u{108FF}'),
2446    ('\u{10916}', '\u{1091B}'),
2447    ('\u{109BC}', '\u{109BD}'),
2448    ('\u{109C0}', '\u{109CF}'),
2449    ('\u{109D2}', '\u{109FF}'),
2450    ('\u{10A40}', '\u{10A48}'),
2451    ('\u{10A7D}', '\u{10A7E}'),
2452    ('\u{10A9D}', '\u{10A9F}'),
2453    ('\u{10AEB}', '\u{10AEF}'),
2454    ('\u{10B58}', '\u{10B5F}'),
2455    ('\u{10B78}', '\u{10B7F}'),
2456    ('\u{10BA9}', '\u{10BAF}'),
2457    ('\u{10CFA}', '\u{10CFF}'),
2458    ('\u{10D30}', '\u{10D39}'),
2459    ('\u{10E60}', '\u{10E7E}'),
2460    ('\u{10F1D}', '\u{10F26}'),
2461    ('\u{10F51}', '\u{10F54}'),
2462    ('\u{10FC5}', '\u{10FCB}'),
2463    ('\u{11052}', '\u{1106F}'),
2464    ('\u{110F0}', '\u{110F9}'),
2465    ('\u{11136}', '\u{1113F}'),
2466    ('\u{111D0}', '\u{111D9}'),
2467    ('\u{111E1}', '\u{111F4}'),
2468    ('\u{112F0}', '\u{112F9}'),
2469    ('\u{11450}', '\u{11459}'),
2470    ('\u{114D0}', '\u{114D9}'),
2471    ('\u{11650}', '\u{11659}'),
2472    ('\u{116C0}', '\u{116C9}'),
2473    ('\u{11730}', '\u{1173B}'),
2474    ('\u{118E0}', '\u{118F2}'),
2475    ('\u{11950}', '\u{11959}'),
2476    ('\u{11C50}', '\u{11C6C}'),
2477    ('\u{11D50}', '\u{11D59}'),
2478    ('\u{11DA0}', '\u{11DA9}'),
2479    ('\u{11F50}', '\u{11F59}'),
2480    ('\u{11FC0}', '\u{11FD4}'),
2481    ('\u{12400}', '\u{1246E}'),
2482    ('\u{16A60}', '\u{16A69}'),
2483    ('\u{16AC0}', '\u{16AC9}'),
2484    ('\u{16B50}', '\u{16B59}'),
2485    ('\u{16B5B}', '\u{16B61}'),
2486    ('\u{16E80}', '\u{16E96}'),
2487    ('\u{1D2C0}', '\u{1D2D3}'),
2488    ('\u{1D2E0}', '\u{1D2F3}'),
2489    ('\u{1D360}', '\u{1D378}'),
2490    ('\u{1D7CE}', '\u{1D7FF}'),
2491    ('\u{1E140}', '\u{1E149}'),
2492    ('\u{1E2F0}', '\u{1E2F9}'),
2493    ('\u{1E4F0}', '\u{1E4F9}'),
2494    ('\u{1E8C7}', '\u{1E8CF}'),
2495    ('\u{1E950}', '\u{1E959}'),
2496    ('\u{1EC71}', '\u{1ECAB}'),
2497    ('\u{1ECAD}', '\u{1ECAF}'),
2498    ('\u{1ECB1}', '\u{1ECB4}'),
2499    ('\u{1ED01}', '\u{1ED2D}'),
2500    ('\u{1ED2F}', '\u{1ED3D}'),
2501    ('\u{1F100}', '\u{1F10C}'),
2502    ('\u{1FBF0}', '\u{1FBF9}'),
2503    ('\u{20001}', '\u{20001}'),
2504    ('\u{20064}', '\u{20064}'),
2505    ('\u{200E2}', '\u{200E2}'),
2506    ('\u{20121}', '\u{20121}'),
2507    ('\u{2092A}', '\u{2092A}'),
2508    ('\u{20983}', '\u{20983}'),
2509    ('\u{2098C}', '\u{2098C}'),
2510    ('\u{2099C}', '\u{2099C}'),
2511    ('\u{20AEA}', '\u{20AEA}'),
2512    ('\u{20AFD}', '\u{20AFD}'),
2513    ('\u{20B19}', '\u{20B19}'),
2514    ('\u{22390}', '\u{22390}'),
2515    ('\u{22998}', '\u{22998}'),
2516    ('\u{23B1B}', '\u{23B1B}'),
2517    ('\u{2626D}', '\u{2626D}'),
2518    ('\u{2F890}', '\u{2F890}'),
2519];
2520
2521/// `true` if `c` has Unicode `Numeric_Type` Decimal or Digit
2522/// (`Py_UNICODE_ISDIGIT`).
2523fn is_digit_char(c: char) -> bool {
2524    in_ranges(DIGIT_RANGES, c)
2525}
2526
2527/// `true` if `c` has Unicode `Numeric_Type` Decimal (`Py_UNICODE_ISDECIMAL`).
2528fn is_decimal_char(c: char) -> bool {
2529    in_ranges(DECIMAL_RANGES, c)
2530}
2531
2532/// `true` if `c` has Unicode `Numeric_Type` Decimal, Digit, or Numeric
2533/// (`Py_UNICODE_ISNUMERIC`). Backed by the 15.1.0-locked
2534/// [`NUMERIC_TYPE_RANGES`] table rather than Rust's `char::is_numeric`,
2535/// which tracks the `Nd|Nl|No` general categories against a newer
2536/// Unicode revision and so both misses ideographic numerals (`Lo`) and
2537/// includes post-15.1.0 codepoints.
2538fn is_numeric_char(c: char) -> bool {
2539    in_ranges(NUMERIC_TYPE_RANGES, c)
2540}
2541
2542/// `true` if `c` is in Unicode general category `L*` (`str.isalpha`).
2543fn is_alpha_char(c: char) -> bool {
2544    in_ranges(ALPHA_RANGES, c)
2545}
2546
2547/// `true` if `c` has the Unicode `Uppercase` property (`Py_UNICODE_ISUPPER`).
2548fn is_upper_char(c: char) -> bool {
2549    in_ranges(UPPER_RANGES, c)
2550}
2551
2552/// `true` if `c` has the Unicode `Lowercase` property (`Py_UNICODE_ISLOWER`).
2553fn is_lower_char(c: char) -> bool {
2554    in_ranges(LOWER_RANGES, c)
2555}
2556
2557/// `true` if `c` is a titlecase letter, category `Lt` (`Py_UNICODE_ISTITLE`).
2558fn is_title_char(c: char) -> bool {
2559    in_ranges(TITLE_RANGES, c)
2560}
2561
2562/// `true` if `c` is whitespace per `str.isspace` (`Py_UNICODE_ISSPACE`).
2563fn is_space_char(c: char) -> bool {
2564    in_ranges(WHITESPACE_RANGES, c)
2565}
2566
2567/// Classify each element by applying `pred` to the string content.
2568fn classify<D: Dimension>(
2569    a: &StringArray<D>,
2570    pred: impl Fn(&str) -> bool,
2571) -> FerrayResult<Array<bool, D>> {
2572    let data: Vec<bool> = a.iter().map(|s| pred(s)).collect();
2573    Array::from_vec(a.dim().clone(), data)
2574}
2575
2576/// Return `true` where every character is alphabetic (Unicode general
2577/// category `L*`) and the string is non-empty. Matches
2578/// `numpy.strings.isalpha`, which delegates per element to CPython's
2579/// `str.isalpha()`. NARROWER than Rust's `char::is_alphabetic`: roman
2580/// numerals like `Ⅻ` (category `Nl`) and combining marks return `false`.
2581/// See [`is_alpha_char`]/[`ALPHA_RANGES`].
2582pub fn isalpha<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
2583    classify(a, |s| !s.is_empty() && s.chars().all(is_alpha_char))
2584}
2585
2586/// Return `true` where every character is a digit and the string is
2587/// non-empty. Matches `numpy.strings.isdigit`, which delegates per
2588/// element to CPython's `str.isdigit()` (`Py_UNICODE_ISDIGIT`):
2589/// Unicode `Numeric_Type` of Decimal **or** Digit. This is broader than
2590/// [`isdecimal`] (it accepts superscripts/subscripts like `²³`, circled
2591/// digits like `①`, and parenthesized digits) and narrower than
2592/// [`isnumeric`] (it rejects fractions like `½` and letter-numerals like
2593/// `Ⅻ`). See [`is_digit_char`]/[`DIGIT_RANGES`].
2594pub fn isdigit<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
2595    classify(a, |s| !s.is_empty() && s.chars().all(is_digit_char))
2596}
2597
2598/// Return `true` where every character is whitespace and the string is
2599/// non-empty. Matches `numpy.strings.isspace`, which delegates per
2600/// element to CPython's `str.isspace()` (`Py_UNICODE_ISSPACE`): the
2601/// Unicode `White_Space` property plus the bidi separators
2602/// `U+001C`..`U+001F`. See [`is_space_char`]/[`WHITESPACE_RANGES`].
2603pub fn isspace<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
2604    classify(a, |s| !s.is_empty() && s.chars().all(is_space_char))
2605}
2606
2607/// Return `true` where the string is uppercased and the string is
2608/// non-empty. Matches `numpy.strings.isupper` / CPython `str.isupper()`
2609/// (`do_isupper`): there is at least one cased character and no cased
2610/// character is lowercase or titlecase. Uncased characters (digits,
2611/// punctuation, roman numerals like `Ⅻ` which has the `Uppercase`
2612/// property) do not disqualify the string.
2613pub fn isupper<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
2614    classify(a, |s| {
2615        let mut cased = false;
2616        for c in s.chars() {
2617            if is_lower_char(c) || is_title_char(c) {
2618                return false;
2619            }
2620            if is_upper_char(c) {
2621                cased = true;
2622            }
2623        }
2624        cased
2625    })
2626}
2627
2628/// Return `true` where the string is lowercased and the string is
2629/// non-empty. Matches `numpy.strings.islower` / CPython `str.islower()`
2630/// (`do_islower`): at least one cased character and no cased character is
2631/// uppercase or titlecase.
2632pub fn islower<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
2633    classify(a, |s| {
2634        let mut cased = false;
2635        for c in s.chars() {
2636            if is_upper_char(c) || is_title_char(c) {
2637                return false;
2638            }
2639            if is_lower_char(c) {
2640                cased = true;
2641            }
2642        }
2643        cased
2644    })
2645}
2646
2647/// Return `true` where every character is alphanumeric and the string is
2648/// non-empty. Matches `numpy.strings.isalnum` / CPython `str.isalnum()`:
2649/// every character is alphabetic (`isalpha`) OR a decimal/digit/numeric
2650/// character (`isdecimal`/`isdigit`/`isnumeric`). Composes the corrected
2651/// predicates: `½` (numeric) and `②` (digit) are alphanumeric.
2652pub fn isalnum<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
2653    classify(a, |s| {
2654        !s.is_empty()
2655            && s.chars().all(|c| {
2656                is_alpha_char(c) || is_decimal_char(c) || is_digit_char(c) || is_numeric_char(c)
2657            })
2658    })
2659}
2660
2661/// Return `true` where every character is numeric and the string is
2662/// non-empty. Matches `numpy.strings.isnumeric`, which delegates per
2663/// element to CPython's `str.isnumeric()` (Unicode `Numeric_Type` =
2664/// Decimal, Digit, or Numeric): superscripts/fractions like `²` and `½`,
2665/// roman numerals like `Ⅻ`, and ideographic numerals like `一` are
2666/// numeric, while `.`, `+`, and `-` are not. Backed by the 15.1.0-locked
2667/// [`NUMERIC_TYPE_RANGES`] table via [`is_numeric_char`], NOT Rust's
2668/// `char::is_numeric` (which tests the `Nd|Nl|No` categories against a
2669/// newer Unicode revision: it misses the `Lo` ideographic numerals and
2670/// wrongly accepts codepoints assigned after Unicode 15.1.0).
2671pub fn isnumeric<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
2672    classify(a, |s| !s.is_empty() && s.chars().all(is_numeric_char))
2673}
2674
2675/// Return `true` where every character is a Unicode decimal digit
2676/// (`Numeric_Type` = Decimal) and the string is non-empty. Matches
2677/// `numpy.strings.isdecimal` / CPython `str.isdecimal()`
2678/// (`Py_UNICODE_ISDECIMAL`). Stricter than [`isdigit`]: superscripts
2679/// (`²`), circled digits (`①`) and fractions (`½`) return `false`, while
2680/// non-ASCII positional decimals such as fullwidth `0` and Arabic-Indic
2681/// `٣` return `true`. See [`is_decimal_char`]/[`DECIMAL_RANGES`].
2682pub fn isdecimal<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
2683    classify(a, |s| !s.is_empty() && s.chars().all(is_decimal_char))
2684}
2685
2686/// Return `true` where the string is titlecased and non-empty. Matches
2687/// `numpy.strings.istitle` / CPython `str.istitle()` (`do_istitle`):
2688/// uppercase/titlecase characters may only follow an uncased character
2689/// (they open a word), lowercase characters may only follow a cased
2690/// character, and at least one cased character is present. Titlecase
2691/// (`Lt`) characters such as `Dž` count as an opening (uppercase-position)
2692/// character, so `"Dž"` and `"Džx"` are titlecased.
2693pub fn istitle<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
2694    classify(a, |s| {
2695        let mut cased = false;
2696        let mut previous_is_cased = false;
2697        for c in s.chars() {
2698            if is_upper_char(c) || is_title_char(c) {
2699                if previous_is_cased {
2700                    return false;
2701                }
2702                previous_is_cased = true;
2703                cased = true;
2704            } else if is_lower_char(c) {
2705                if !previous_is_cased {
2706                    return false;
2707                }
2708                cased = true;
2709            } else {
2710                previous_is_cased = false;
2711            }
2712        }
2713        cased
2714    })
2715}
2716
2717#[cfg(test)]
2718mod tests {
2719    use super::*;
2720    use crate::string_array::array;
2721
2722    #[test]
2723    fn test_isalpha() {
2724        let a = array(&["hello", "hello123", "", "HELLO"]).unwrap();
2725        let r = isalpha(&a).unwrap();
2726        assert_eq!(r.as_slice().unwrap(), &[true, false, false, true]);
2727    }
2728
2729    #[test]
2730    fn test_isdigit() {
2731        let a = array(&["123", "12.3", "", "abc"]).unwrap();
2732        let r = isdigit(&a).unwrap();
2733        assert_eq!(r.as_slice().unwrap(), &[true, false, false, false]);
2734    }
2735
2736    #[test]
2737    fn test_isspace() {
2738        let a = array(&["  ", "\t\n", "", "a b"]).unwrap();
2739        let r = isspace(&a).unwrap();
2740        assert_eq!(r.as_slice().unwrap(), &[true, true, false, false]);
2741    }
2742
2743    #[test]
2744    fn test_isupper() {
2745        let a = array(&["HELLO", "Hello", "hello", ""]).unwrap();
2746        let r = isupper(&a).unwrap();
2747        assert_eq!(r.as_slice().unwrap(), &[true, false, false, false]);
2748    }
2749
2750    #[test]
2751    fn test_islower() {
2752        let a = array(&["hello", "Hello", "HELLO", ""]).unwrap();
2753        let r = islower(&a).unwrap();
2754        assert_eq!(r.as_slice().unwrap(), &[true, false, false, false]);
2755    }
2756
2757    #[test]
2758    fn test_isalnum() {
2759        let a = array(&["abc123", "abc 123", "", "abc"]).unwrap();
2760        let r = isalnum(&a).unwrap();
2761        assert_eq!(r.as_slice().unwrap(), &[true, false, false, true]);
2762    }
2763
2764    #[test]
2765    fn test_istitle() {
2766        let a = array(&["Hello World", "hello world", "HELLO WORLD", ""]).unwrap();
2767        let r = istitle(&a).unwrap();
2768        assert_eq!(r.as_slice().unwrap(), &[true, false, false, false]);
2769    }
2770
2771    // --- Divergence pins for crosslink #823/#911 (isnumeric/isdecimal/
2772    // isalnum/isdigit). Expected values are the live results of CPython
2773    // 3.13 / numpy 2.4 (Unicode 15.1.0) per-character `str.is*()`. ---
2774
2775    #[test]
2776    fn test_isnumeric_python_semantics() {
2777        let a = array(&["12.3", "²", "½", "123", ""]).unwrap();
2778        let r = isnumeric(&a).unwrap();
2779        assert_eq!(r.as_slice().unwrap(), &[false, true, true, true, false]);
2780    }
2781
2782    #[test]
2783    fn test_isdigit_python_semantics() {
2784        let a = array(&[
2785            "²³", "⁴", "₀", "①", "⓪", "⑴", "09", "٣", "½", "Ⅻ", "123", "12a", "",
2786        ])
2787        .unwrap();
2788        let r = isdigit(&a).unwrap();
2789        assert_eq!(
2790            r.as_slice().unwrap(),
2791            &[
2792                true, true, true, true, true, true, true, true, false, false, true, false, false,
2793            ]
2794        );
2795    }
2796
2797    #[test]
2798    fn test_isdigit_does_not_regress_isdecimal_isnumeric() {
2799        let a = array(&["²", "½", "①", "Ⅻ", "0123456789"]).unwrap();
2800        assert_eq!(
2801            isdecimal(&a).unwrap().as_slice().unwrap(),
2802            &[false, false, false, false, true]
2803        );
2804        assert_eq!(
2805            isnumeric(&a).unwrap().as_slice().unwrap(),
2806            &[true, true, true, true, true]
2807        );
2808        assert_eq!(
2809            isdigit(&a).unwrap().as_slice().unwrap(),
2810            &[true, false, true, false, true]
2811        );
2812    }
2813
2814    // --- Divergence pins for crosslink #912 (refs #836). Expected values
2815    // are the live results of CPython 3.13 / numpy.char (Unicode 15.1.0),
2816    // confirmed against `numpy.char.is*` element by element. ---
2817
2818    // isdecimal: `Numeric_Type=Decimal`. Non-ASCII positional decimals
2819    // (fullwidth '0', Arabic-Indic '٣') are True; superscript '²' and
2820    // fraction '½' are False; ASCII '123' True. The PRE-FIX impl used
2821    // `is_ascii_digit()` and returned False for '0'/'٣' (the divergence).
2822    #[test]
2823    fn test_isdecimal_python_semantics_912() {
2824        let a = array(&["0", "٣", "²", "½", "①", "123", "12.3", ""]).unwrap();
2825        let r = isdecimal(&a).unwrap();
2826        assert_eq!(
2827            r.as_slice().unwrap(),
2828            &[true, true, false, false, false, true, false, false]
2829        );
2830    }
2831
2832    // isalpha: Unicode general category L* ONLY. Roman numeral 'Ⅻ' (Nl)
2833    // and circled 'Ⓐ' (So) are NOT alpha; accented 'café', CJK '日本' and
2834    // titlecase digraph 'Dž' (Lt) ARE. The PRE-FIX impl used
2835    // `char::is_alphabetic` (the Alphabetic property) and returned True
2836    // for 'Ⅻ' (the divergence).
2837    #[test]
2838    fn test_isalpha_python_semantics_912() {
2839        let a = array(&["café", "日本", "Dž", "Ⅻ", "Ⓐ", "abc123", "123", ""]).unwrap();
2840        let r = isalpha(&a).unwrap();
2841        assert_eq!(
2842            r.as_slice().unwrap(),
2843            &[true, true, true, false, false, false, false, false]
2844        );
2845    }
2846
2847    // istitle: titlecase Lt char counts as an opening (uppercase-position)
2848    // character. 'Dž' and 'Džx' are titlecased; 'DžX' is not (two cased in a
2849    // row). 'Ⅻ' (Nl, Uppercase property) opens a title word -> True. The
2850    // PRE-FIX impl used `char::is_uppercase` and missed Lt (returned False
2851    // for 'Dž', the divergence).
2852    #[test]
2853    fn test_istitle_python_semantics_912() {
2854        let a = array(&["Hello", "Dž", "Džx", "DžX", "Ⅻ", "hELLO", "Hello World", ""]).unwrap();
2855        let r = istitle(&a).unwrap();
2856        assert_eq!(
2857            r.as_slice().unwrap(),
2858            &[true, true, true, false, true, false, true, false]
2859        );
2860    }
2861
2862    // isupper/islower: Unicode Uppercase/Lowercase properties (incl.
2863    // Other_Uppercase/Lowercase). Roman 'Ⅻ' (Nl, Uppercase prop) is upper;
2864    // 'ⅻ' (lowercase prop) is lower; titlecase 'Dž' is neither. Digraph
2865    // 'dž' (Ll) is lower.
2866    #[test]
2867    fn test_isupper_islower_python_semantics_912() {
2868        let up = array(&["ABC", "Ⅻ", "DŽ", "aBc", "Dž", "A1", "123", ""]).unwrap();
2869        assert_eq!(
2870            isupper(&up).unwrap().as_slice().unwrap(),
2871            &[true, true, true, false, false, true, false, false]
2872        );
2873        let lo = array(&["abc", "ⅻ", "dž", "Hello", "Dž", "a1", "café", ""]).unwrap();
2874        assert_eq!(
2875            islower(&lo).unwrap().as_slice().unwrap(),
2876            &[true, true, true, false, false, true, true, false]
2877        );
2878    }
2879
2880    // isalnum composes the corrected predicates: alpha OR decimal OR digit
2881    // OR numeric. '½' (numeric) and '②' (digit) are alnum; whitespace is
2882    // not. 'Ⅻ' is numeric -> alnum.
2883    #[test]
2884    fn test_isalnum_python_semantics_912() {
2885        let a = array(&["a1", "②", "½", "Ⅻ", "café", "ab c", "abc", ""]).unwrap();
2886        let r = isalnum(&a).unwrap();
2887        assert_eq!(
2888            r.as_slice().unwrap(),
2889            &[true, true, true, true, true, false, true, false]
2890        );
2891    }
2892
2893    // isspace: Unicode White_Space + bidi separators U+001C..U+001F. The
2894    // non-ASCII spaces NBSP (U+00A0) and line/para separator (U+2028/9)
2895    // are space; ZWSP (U+200B) is NOT.
2896    #[test]
2897    fn test_isspace_python_semantics_912() {
2898        let a = array(&["\u{00A0}", "\u{2028}", "\u{1C}", "\u{200B}", " ", "a b", ""]).unwrap();
2899        let r = isspace(&a).unwrap();
2900        assert_eq!(
2901            r.as_slice().unwrap(),
2902            &[true, true, true, false, true, false, false]
2903        );
2904    }
2905
2906    // --- Divergence pins for crosslink #1044/#1045 (isnumeric/isalnum
2907    // backed by a Unicode-15.1.0 `Numeric_Type` table instead of Rust's
2908    // `char::is_numeric`). Expected values are the live results of CPython
2909    // 3.13 / Unicode 15.1.0 `str.isnumeric()`/`str.isalnum()`. ---
2910
2911    // isnumeric: CJK ideographic numerals have `Numeric_Type=Numeric` but
2912    // general category `Lo`, so Rust's `char::is_numeric` (`Nd|Nl|No`)
2913    // MISSES them. The oracle CPython 15.1.0 `chr(0x4E00).isnumeric()` is
2914    // True. '一' (U+4E00), '九' (U+4E5D), '〇' (U+3007, Nl) and roman 'Ⅻ'
2915    // (U+216B, Nl) are numeric; the letter 'A' is not.
2916    #[test]
2917    fn test_isnumeric_cjk_ideographic_numerals_1044() {
2918        let a = array(&["\u{4E00}", "\u{4E5D}", "\u{3007}", "\u{216B}", "A", ""]).unwrap();
2919        let r = isnumeric(&a).unwrap();
2920        assert_eq!(
2921            r.as_slice().unwrap(),
2922            &[true, true, true, true, false, false]
2923        );
2924    }
2925
2926    // isnumeric: U+10D40 (Garay digit) was assigned AFTER Unicode 15.1.0,
2927    // so the oracle CPython 15.1.0 `chr(0x10D40).isnumeric()` is False,
2928    // but Rust's newer `char::is_numeric` tables return True (the
2929    // false-positive divergence). The 15.1.0-locked table must say False.
2930    #[test]
2931    fn test_isnumeric_post_15_1_codepoint_false_1044() {
2932        let a = array(&["\u{10D40}"]).unwrap();
2933        let r = isnumeric(&a).unwrap();
2934        assert_eq!(r.as_slice().unwrap(), &[false]);
2935    }
2936
2937    // isalnum inherits the isnumeric fix: U+10D40 is no longer wrongly
2938    // numeric, so it is no longer wrongly alphanumeric. '一' (numeric) and
2939    // 'a' (alpha) ARE alnum; "a一" mixing alpha+numeric is alnum, but any
2940    // post-15.1.0 codepoint disqualifies the string.
2941    #[test]
2942    fn test_isalnum_drops_post_15_1_false_positive_1045() {
2943        let a = array(&["\u{10D40}", "\u{4E00}", "a", "a\u{10D40}"]).unwrap();
2944        let r = isalnum(&a).unwrap();
2945        assert_eq!(r.as_slice().unwrap(), &[false, true, true, false]);
2946    }
2947
2948    // The NUMERIC_TYPE_RANGES table must DIVERGE from Rust's
2949    // `char::is_numeric` at both extremes of the divergence, proving it is
2950    // a real 15.1.0-locked table and not a delegation to std: an `Lo`
2951    // ideographic numeral std misses, and a post-15.1.0 codepoint std
2952    // wrongly includes.
2953    #[test]
2954    fn test_numeric_table_diverges_from_char_is_numeric() {
2955        // U+4E00 一: Numeric_Type=Numeric (Lo) -> table True, std False.
2956        assert!(is_numeric_char('\u{4E00}'));
2957        assert!(!'\u{4E00}'.is_numeric());
2958        // U+10D40: assigned after 15.1.0 -> table False, std True.
2959        assert!(!is_numeric_char('\u{10D40}'));
2960        assert!('\u{10D40}'.is_numeric());
2961    }
2962
2963    // isalnum == isalpha || isdecimal || isdigit || isnumeric, per CPython
2964    // `str.isalnum`. Sample the composition across each contributing class:
2965    // alpha 'a', ideographic-numeric '一', fraction '½' (No/Numeric),
2966    // circled digit '②' (No/Digit), fullwidth decimal '0', and '.' (none).
2967    #[test]
2968    fn test_isalnum_composition_1045() {
2969        let samples = ["a", "\u{4E00}", "\u{00BD}", "\u{2461}", "\u{FF10}", "."];
2970        for s in samples {
2971            let c = s.chars().next().unwrap();
2972            let composed =
2973                is_alpha_char(c) || is_decimal_char(c) || is_digit_char(c) || is_numeric_char(c);
2974            let a = array(&[s]).unwrap();
2975            let got = isalnum(&a).unwrap();
2976            assert_eq!(got.as_slice().unwrap(), &[composed], "isalnum({s:?})");
2977        }
2978    }
2979}