basic_text_internals/
text_utils.rs

1//! This file contains various utilities which are sensitive to the Unicode
2//! version. It is currently up to date with Unicode 15.1.0 and
3//! Unicode Text Segmentation revision 41.
4//!
5//! Ideally the major predicates in this file should be auto-generated from the
6//! Unicode data files rather than manually maintained.
7
8use crate::pre_normalization::PreNormalization;
9use crate::unicode::{is_normalization_form_starter, ESC, SUB, ZWJ};
10use std::cell::RefCell;
11use std::rc::Rc;
12use unicode_normalization::{is_nfc_stream_safe, is_nfc_stream_safe_quick, IsNormalized};
13
14/// Test whether `c` is a valid start value for a string in Basic Text.
15#[inline]
16pub fn is_basic_text_start(c: char) -> bool {
17    is_starter(c) &&
18    // https://unicode.org/reports/tr29/tr29-41.html#Grapheme_Cluster_Break_Property_Values
19    // ZWJ
20    c != ZWJ &&
21    // Extend
22    !is_grapheme_extend_not_cgj(c) &&
23    !is_emoji_modifier(c) &&
24    // SpacingMark (plus some Extend, which is redundant here)
25    !is_grapheme_cluster_break_spacing_mark_plus(c)
26}
27
28/// Test whether `c` is a valid end value for a string in Basic Text.
29#[inline]
30pub fn is_basic_text_end(c: char) -> bool {
31    // https://unicode.org/reports/tr29/tr29-41.html#Grapheme_Cluster_Break_Property_Values
32    // ZWJ
33    c != ZWJ &&
34    // Prepend
35    !indic_syllabic_category_consonant_preceding_repha(c) &&
36    !indic_syllabic_category_consonant_prefixed(c) &&
37    !prepended_concatenation_mark(c)
38}
39
40/// Test whether `s` is a valid string in Basic Text.
41#[inline]
42pub fn is_basic_text(s: &str) -> bool {
43    if let Some(c) = s.chars().next() {
44        if !is_basic_text_start(c) {
45            return false;
46        }
47    }
48    if let Some(c) = s.chars().next_back() {
49        if !is_basic_text_end(c) {
50            return false;
51        }
52    }
53
54    is_basic_text_substr(s)
55}
56
57/// Test whether `s` is a valid string in Basic Text.
58#[inline]
59pub fn is_basic_text_substr(s: &str) -> bool {
60    !s.chars()
61        .categorize(Rc::new(RefCell::new(None)))
62        .any(|c| matches!(c, SUB | ESC))
63        && is_nfc_stream_safe(s)
64}
65
66/// Test whether `s` is a valid string in Basic Text quickly, in a way that
67/// may return `None` if it can't be determined quickly.
68#[inline]
69pub fn is_basic_text_substr_quick(s: &str) -> Option<bool> {
70    if !s
71        .chars()
72        .categorize(Rc::new(RefCell::new(None)))
73        .any(|c| matches!(c, SUB | ESC))
74    {
75        return Some(false);
76    }
77
78    match is_nfc_stream_safe_quick(s.chars()) {
79        IsNormalized::Yes => Some(true),
80        IsNormalized::No => Some(false),
81        IsNormalized::Maybe => None,
82    }
83}
84
85#[inline]
86fn is_starter(c: char) -> bool {
87    // All ASCII values are starters and many of them are common, so
88    // add a fast-path optimization for this case.
89    c.is_ascii() || is_normalization_form_starter(c)
90}
91
92/// `Grapheme_Extend = Yes`, except CGJ
93const fn is_grapheme_extend_not_cgj(c: char) -> bool {
94    // Unicode 15.1.0, DerivedCoreProperties.txt
95    matches!(
96        c,
97        '\u{300}'..='\u{34e}' // exclude U+34F (CGJ)
98        | '\u{350}'..='\u{36f}'
99        | '\u{483}'..='\u{487}'
100        | '\u{488}'..='\u{489}'
101        | '\u{591}'..='\u{5bd}'
102        | '\u{5bf}'
103        | '\u{5c1}'..='\u{5c2}'
104        | '\u{5c4}'..='\u{5c5}'
105        | '\u{5c7}'
106        | '\u{610}'..='\u{61a}'
107        | '\u{64b}'..='\u{65f}'
108        | '\u{670}'
109        | '\u{6d6}'..='\u{6dc}'
110        | '\u{6df}'..='\u{6e4}'
111        | '\u{6e7}'..='\u{6e8}'
112        | '\u{6ea}'..='\u{6ed}'
113        | '\u{711}'
114        | '\u{730}'..='\u{74a}'
115        | '\u{7a6}'..='\u{7b0}'
116        | '\u{7eb}'..='\u{7f3}'
117        | '\u{7fd}'
118        | '\u{816}'..='\u{819}'
119        | '\u{81b}'..='\u{823}'
120        | '\u{825}'..='\u{827}'
121        | '\u{829}'..='\u{82d}'
122        | '\u{859}'..='\u{85b}'
123        | '\u{898}'..='\u{89f}'
124        | '\u{8ca}'..='\u{8e1}'
125        | '\u{8e3}'..='\u{902}'
126        | '\u{93a}'
127        | '\u{93c}'
128        | '\u{941}'..='\u{948}'
129        | '\u{94d}'
130        | '\u{951}'..='\u{957}'
131        | '\u{962}'..='\u{963}'
132        | '\u{981}'
133        | '\u{9bc}'
134        | '\u{9be}'
135        | '\u{9c1}'..='\u{9c4}'
136        | '\u{9cd}'
137        | '\u{9d7}'
138        | '\u{9e2}'..='\u{9e3}'
139        | '\u{9fe}'
140        | '\u{a01}'..='\u{a02}'
141        | '\u{a3c}'
142        | '\u{a41}'..='\u{a42}'
143        | '\u{a47}'..='\u{a48}'
144        | '\u{a4b}'..='\u{a4d}'
145        | '\u{a51}'
146        | '\u{a70}'..='\u{a71}'
147        | '\u{a75}'
148        | '\u{a81}'..='\u{a82}'
149        | '\u{abc}'
150        | '\u{ac1}'..='\u{ac5}'
151        | '\u{ac7}'..='\u{ac8}'
152        | '\u{acd}'
153        | '\u{ae2}'..='\u{ae3}'
154        | '\u{afa}'..='\u{aff}'
155        | '\u{b01}'
156        | '\u{b3c}'
157        | '\u{b3e}'
158        | '\u{b3f}'
159        | '\u{b41}'..='\u{b44}'
160        | '\u{b4d}'
161        | '\u{b55}'..='\u{b56}'
162        | '\u{b57}'
163        | '\u{b62}'..='\u{b63}'
164        | '\u{b82}'
165        | '\u{bbe}'
166        | '\u{bc0}'
167        | '\u{bcd}'
168        | '\u{bd7}'
169        | '\u{c00}'
170        | '\u{c04}'
171        | '\u{c3c}'
172        | '\u{c3e}'..='\u{c40}'
173        | '\u{c46}'..='\u{c48}'
174        | '\u{c4a}'..='\u{c4d}'
175        | '\u{c55}'..='\u{c56}'
176        | '\u{c62}'..='\u{c63}'
177        | '\u{c81}'
178        | '\u{cbc}'
179        | '\u{cbf}'
180        | '\u{cc2}'
181        | '\u{cc6}'
182        | '\u{ccc}'..='\u{ccd}'
183        | '\u{cd5}'..='\u{cd6}'
184        | '\u{ce2}'..='\u{ce3}'
185        | '\u{d00}'..='\u{d01}'
186        | '\u{d3b}'..='\u{d3c}'
187        | '\u{d3e}'
188        | '\u{d41}'..='\u{d44}'
189        | '\u{d4d}'
190        | '\u{d57}'
191        | '\u{d62}'..='\u{d63}'
192        | '\u{d81}'
193        | '\u{dca}'
194        | '\u{dcf}'
195        | '\u{dd2}'..='\u{dd4}'
196        | '\u{dd6}'
197        | '\u{ddf}'
198        | '\u{e31}'
199        | '\u{e34}'..='\u{e3a}'
200        | '\u{e47}'..='\u{e4e}'
201        | '\u{eb1}'
202        | '\u{eb4}'..='\u{ebc}'
203        | '\u{ec8}'..='\u{ece}'
204        | '\u{f18}'..='\u{f19}'
205        | '\u{f35}'
206        | '\u{f37}'
207        | '\u{f39}'
208        | '\u{f71}'..='\u{f7e}'
209        | '\u{f80}'..='\u{f84}'
210        | '\u{f86}'..='\u{f87}'
211        | '\u{f8d}'..='\u{f97}'
212        | '\u{f99}'..='\u{fbc}'
213        | '\u{fc6}'
214        | '\u{102d}'..='\u{1030}'
215        | '\u{1032}'..='\u{1037}'
216        | '\u{1039}'..='\u{103a}'
217        | '\u{103d}'..='\u{103e}'
218        | '\u{1058}'..='\u{1059}'
219        | '\u{105e}'..='\u{1060}'
220        | '\u{1071}'..='\u{1074}'
221        | '\u{1082}'
222        | '\u{1085}'..='\u{1086}'
223        | '\u{108d}'
224        | '\u{109d}'
225        | '\u{135d}'..='\u{135f}'
226        | '\u{1712}'..='\u{1714}'
227        | '\u{1732}'..='\u{1733}'
228        | '\u{1752}'..='\u{1753}'
229        | '\u{1772}'..='\u{1773}'
230        | '\u{17b4}'..='\u{17b5}'
231        | '\u{17b7}'..='\u{17bd}'
232        | '\u{17c6}'
233        | '\u{17c9}'..='\u{17d3}'
234        | '\u{17dd}'
235        | '\u{180b}'..='\u{180d}'
236        | '\u{180f}'
237        | '\u{1885}'..='\u{1886}'
238        | '\u{18a9}'
239        | '\u{1920}'..='\u{1922}'
240        | '\u{1927}'..='\u{1928}'
241        | '\u{1932}'
242        | '\u{1939}'..='\u{193b}'
243        | '\u{1a17}'..='\u{1a18}'
244        | '\u{1a1b}'
245        | '\u{1a56}'
246        | '\u{1a58}'..='\u{1a5e}'
247        | '\u{1a60}'
248        | '\u{1a62}'
249        | '\u{1a65}'..='\u{1a6c}'
250        | '\u{1a73}'..='\u{1a7c}'
251        | '\u{1a7f}'
252        | '\u{1ab0}'..='\u{1abd}'
253        | '\u{1abe}'
254        | '\u{1abf}'..='\u{1ace}'
255        | '\u{1b00}'..='\u{1b03}'
256        | '\u{1b34}'
257        | '\u{1b35}'
258        | '\u{1b36}'..='\u{1b3a}'
259        | '\u{1b3c}'
260        | '\u{1b42}'
261        | '\u{1b6b}'..='\u{1b73}'
262        | '\u{1b80}'..='\u{1b81}'
263        | '\u{1ba2}'..='\u{1ba5}'
264        | '\u{1ba8}'..='\u{1ba9}'
265        | '\u{1bab}'..='\u{1bad}'
266        | '\u{1be6}'
267        | '\u{1be8}'..='\u{1be9}'
268        | '\u{1bed}'
269        | '\u{1bef}'..='\u{1bf1}'
270        | '\u{1c2c}'..='\u{1c33}'
271        | '\u{1c36}'..='\u{1c37}'
272        | '\u{1cd0}'..='\u{1cd2}'
273        | '\u{1cd4}'..='\u{1ce0}'
274        | '\u{1ce2}'..='\u{1ce8}'
275        | '\u{1ced}'
276        | '\u{1cf4}'
277        | '\u{1cf8}'..='\u{1cf9}'
278        | '\u{1dc0}'..='\u{1dff}'
279        | '\u{200c}'
280        | '\u{20d0}'..='\u{20dc}'
281        | '\u{20dd}'..='\u{20e0}'
282        | '\u{20e1}'
283        | '\u{20e2}'..='\u{20e4}'
284        | '\u{20e5}'..='\u{20f0}'
285        | '\u{2cef}'..='\u{2cf1}'
286        | '\u{2d7f}'
287        | '\u{2de0}'..='\u{2dff}'
288        | '\u{302a}'..='\u{302d}'
289        | '\u{302e}'..='\u{302f}'
290        | '\u{3099}'..='\u{309a}'
291        | '\u{a66f}'
292        | '\u{a670}'..='\u{a672}'
293        | '\u{a674}'..='\u{a67d}'
294        | '\u{a69e}'..='\u{a69f}'
295        | '\u{a6f0}'..='\u{a6f1}'
296        | '\u{a802}'
297        | '\u{a806}'
298        | '\u{a80b}'
299        | '\u{a825}'..='\u{a826}'
300        | '\u{a82c}'
301        | '\u{a8c4}'..='\u{a8c5}'
302        | '\u{a8e0}'..='\u{a8f1}'
303        | '\u{a8ff}'
304        | '\u{a926}'..='\u{a92d}'
305        | '\u{a947}'..='\u{a951}'
306        | '\u{a980}'..='\u{a982}'
307        | '\u{a9b3}'
308        | '\u{a9b6}'..='\u{a9b9}'
309        | '\u{a9bc}'..='\u{a9bd}'
310        | '\u{a9e5}'
311        | '\u{aa29}'..='\u{aa2e}'
312        | '\u{aa31}'..='\u{aa32}'
313        | '\u{aa35}'..='\u{aa36}'
314        | '\u{aa43}'
315        | '\u{aa4c}'
316        | '\u{aa7c}'
317        | '\u{aab0}'
318        | '\u{aab2}'..='\u{aab4}'
319        | '\u{aab7}'..='\u{aab8}'
320        | '\u{aabe}'..='\u{aabf}'
321        | '\u{aac1}'
322        | '\u{aaec}'..='\u{aaed}'
323        | '\u{aaf6}'
324        | '\u{abe5}'
325        | '\u{abe8}'
326        | '\u{abed}'
327        | '\u{fb1e}'
328        | '\u{fe00}'..='\u{fe0f}'
329        | '\u{fe20}'..='\u{fe2f}'
330        | '\u{ff9e}'..='\u{ff9f}'
331        | '\u{101fd}'
332        | '\u{102e0}'
333        | '\u{10376}'..='\u{1037a}'
334        | '\u{10a01}'..='\u{10a03}'
335        | '\u{10a05}'..='\u{10a06}'
336        | '\u{10a0c}'..='\u{10a0f}'
337        | '\u{10a38}'..='\u{10a3a}'
338        | '\u{10a3f}'
339        | '\u{10ae5}'..='\u{10ae6}'
340        | '\u{10d24}'..='\u{10d27}'
341        | '\u{10eab}'..='\u{10eac}'
342        | '\u{10efd}'..='\u{10eff}'
343        | '\u{10f46}'..='\u{10f50}'
344        | '\u{10f82}'..='\u{10f85}'
345        | '\u{11001}'
346        | '\u{11038}'..='\u{11046}'
347        | '\u{11070}'
348        | '\u{11073}'..='\u{11074}'
349        | '\u{1107f}'..='\u{11081}'
350        | '\u{110b3}'..='\u{110b6}'
351        | '\u{110b9}'..='\u{110ba}'
352        | '\u{110c2}'
353        | '\u{11100}'..='\u{11102}'
354        | '\u{11127}'..='\u{1112b}'
355        | '\u{1112d}'..='\u{11134}'
356        | '\u{11173}'
357        | '\u{11180}'..='\u{11181}'
358        | '\u{111b6}'..='\u{111be}'
359        | '\u{111c9}'..='\u{111cc}'
360        | '\u{111cf}'
361        | '\u{1122f}'..='\u{11231}'
362        | '\u{11234}'
363        | '\u{11236}'..='\u{11237}'
364        | '\u{1123e}'
365        | '\u{11241}'
366        | '\u{112df}'
367        | '\u{112e3}'..='\u{112ea}'
368        | '\u{11300}'..='\u{11301}'
369        | '\u{1133b}'..='\u{1133c}'
370        | '\u{1133e}'
371        | '\u{11340}'
372        | '\u{11357}'
373        | '\u{11366}'..='\u{1136c}'
374        | '\u{11370}'..='\u{11374}'
375        | '\u{11438}'..='\u{1143f}'
376        | '\u{11442}'..='\u{11444}'
377        | '\u{11446}'
378        | '\u{1145e}'
379        | '\u{114b0}'
380        | '\u{114b3}'..='\u{114b8}'
381        | '\u{114ba}'
382        | '\u{114bd}'
383        | '\u{114bf}'..='\u{114c0}'
384        | '\u{114c2}'..='\u{114c3}'
385        | '\u{115af}'
386        | '\u{115b2}'..='\u{115b5}'
387        | '\u{115bc}'..='\u{115bd}'
388        | '\u{115bf}'..='\u{115c0}'
389        | '\u{115dc}'..='\u{115dd}'
390        | '\u{11633}'..='\u{1163a}'
391        | '\u{1163d}'
392        | '\u{1163f}'..='\u{11640}'
393        | '\u{116ab}'
394        | '\u{116ad}'
395        | '\u{116b0}'..='\u{116b5}'
396        | '\u{116b7}'
397        | '\u{1171d}'..='\u{1171f}'
398        | '\u{11722}'..='\u{11725}'
399        | '\u{11727}'..='\u{1172b}'
400        | '\u{1182f}'..='\u{11837}'
401        | '\u{11839}'..='\u{1183a}'
402        | '\u{11930}'
403        | '\u{1193b}'..='\u{1193c}'
404        | '\u{1193e}'
405        | '\u{11943}'
406        | '\u{119d4}'..='\u{119d7}'
407        | '\u{119da}'..='\u{119db}'
408        | '\u{119e0}'
409        | '\u{11a01}'..='\u{11a0a}'
410        | '\u{11a33}'..='\u{11a38}'
411        | '\u{11a3b}'..='\u{11a3e}'
412        | '\u{11a47}'
413        | '\u{11a51}'..='\u{11a56}'
414        | '\u{11a59}'..='\u{11a5b}'
415        | '\u{11a8a}'..='\u{11a96}'
416        | '\u{11a98}'..='\u{11a99}'
417        | '\u{11c30}'..='\u{11c36}'
418        | '\u{11c38}'..='\u{11c3d}'
419        | '\u{11c3f}'
420        | '\u{11c92}'..='\u{11ca7}'
421        | '\u{11caa}'..='\u{11cb0}'
422        | '\u{11cb2}'..='\u{11cb3}'
423        | '\u{11cb5}'..='\u{11cb6}'
424        | '\u{11d31}'..='\u{11d36}'
425        | '\u{11d3a}'
426        | '\u{11d3c}'..='\u{11d3d}'
427        | '\u{11d3f}'..='\u{11d45}'
428        | '\u{11d47}'
429        | '\u{11d90}'..='\u{11d91}'
430        | '\u{11d95}'
431        | '\u{11d97}'
432        | '\u{11ef3}'..='\u{11ef4}'
433        | '\u{11f00}'..='\u{11f01}'
434        | '\u{11f36}'..='\u{11f3a}'
435        | '\u{11f40}'
436        | '\u{11f42}'
437        | '\u{13440}'
438        | '\u{13447}'..='\u{13455}'
439        | '\u{16af0}'..='\u{16af4}'
440        | '\u{16b30}'..='\u{16b36}'
441        | '\u{16f4f}'
442        | '\u{16f8f}'..='\u{16f92}'
443        | '\u{16fe4}'
444        | '\u{1bc9d}'..='\u{1bc9e}'
445        | '\u{1cf00}'..='\u{1cf2d}'
446        | '\u{1cf30}'..='\u{1cf46}'
447        | '\u{1d165}'
448        | '\u{1d167}'..='\u{1d169}'
449        | '\u{1d16e}'..='\u{1d172}'
450        | '\u{1d17b}'..='\u{1d182}'
451        | '\u{1d185}'..='\u{1d18b}'
452        | '\u{1d1aa}'..='\u{1d1ad}'
453        | '\u{1d242}'..='\u{1d244}'
454        | '\u{1da00}'..='\u{1da36}'
455        | '\u{1da3b}'..='\u{1da6c}'
456        | '\u{1da75}'
457        | '\u{1da84}'
458        | '\u{1da9b}'..='\u{1da9f}'
459        | '\u{1daa1}'..='\u{1daaf}'
460        | '\u{1e000}'..='\u{1e006}'
461        | '\u{1e008}'..='\u{1e018}'
462        | '\u{1e01b}'..='\u{1e021}'
463        | '\u{1e023}'..='\u{1e024}'
464        | '\u{1e026}'..='\u{1e02a}'
465        | '\u{1e08f}'
466        | '\u{1e130}'..='\u{1e136}'
467        | '\u{1e2ae}'
468        | '\u{1e2ec}'..='\u{1e2ef}'
469        | '\u{1e4ec}'..='\u{1e4ef}'
470        | '\u{1e8d0}'..='\u{1e8d6}'
471        | '\u{1e944}'..='\u{1e94a}'
472        | '\u{e0020}'..='\u{e007f}'
473        | '\u{e0100}'..='\u{e01ef}',
474    )
475}
476
477/// `Emoji_Modifier = Yes`
478const fn is_emoji_modifier(c: char) -> bool {
479    // Unicode 15.1.0, emoji/emoji-data.txt
480    matches!(c, '\u{1f3fb}'..='\u{1f3ff}')
481}
482
483/// `Grapheme_Cluster_Break = SpacingMark`, ignoring the
484/// `Grapheme_Cluster_Break ≠ Extend` rule, because it's redundant here.
485const fn is_grapheme_cluster_break_spacing_mark_plus(c: char) -> bool {
486    c == '\u{e33}'
487        || c == '\u{eb3}'
488        || (is_general_category_spacing_mark(c)
489            && !matches!(
490                    c,
491                    '\u{102b}'
492                    | '\u{102c}'
493                    | '\u{1038}'
494                    | '\u{1062}'..='\u{1064}'
495                    | '\u{1067}'..='\u{106d}'
496                    | '\u{1083}'
497                    | '\u{1087}'..='\u{108c}'
498                    | '\u{108f}'
499                    | '\u{109a}'..='\u{109c}'
500                    | '\u{1a61}'
501                    | '\u{1a63}'
502                    | '\u{1a64}'
503                    | '\u{aa7b}'
504                    | '\u{aa7d}'
505                    | '\u{11720}'
506                    | '\u{11721}',
507            ))
508}
509
510/// `General_Category = Spacing_Mark`
511const fn is_general_category_spacing_mark(c: char) -> bool {
512    // Unicode 15.1.0, DerivedGeneralCategory.txt
513    matches!(
514        c,
515        '\u{903}'
516        | '\u{93b}'
517        | '\u{93e}'..='\u{940}'
518        | '\u{949}'..='\u{94c}'
519        | '\u{94e}'..='\u{94f}'
520        | '\u{982}'..='\u{983}'
521        | '\u{9be}'..='\u{9c0}'
522        | '\u{9c7}'..='\u{9c8}'
523        | '\u{9cb}'..='\u{9cc}'
524        | '\u{9d7}'
525        | '\u{a03}'
526        | '\u{a3e}'..='\u{a40}'
527        | '\u{a83}'
528        | '\u{abe}'..='\u{ac0}'
529        | '\u{ac9}'
530        | '\u{acb}'..='\u{acc}'
531        | '\u{b02}'..='\u{b03}'
532        | '\u{b3e}'
533        | '\u{b40}'
534        | '\u{b47}'..='\u{b48}'
535        | '\u{b4b}'..='\u{b4c}'
536        | '\u{b57}'
537        | '\u{bbe}'..='\u{bbf}'
538        | '\u{bc1}'..='\u{bc2}'
539        | '\u{bc6}'..='\u{bc8}'
540        | '\u{bca}'..='\u{bcc}'
541        | '\u{bd7}'
542        | '\u{c01}'..='\u{c03}'
543        | '\u{c41}'..='\u{c44}'
544        | '\u{c82}'..='\u{c83}'
545        | '\u{cbe}'
546        | '\u{cc0}'..='\u{cc4}'
547        | '\u{cc7}'..='\u{cc8}'
548        | '\u{cca}'..='\u{ccb}'
549        | '\u{cd5}'..='\u{cd6}'
550        | '\u{cf3}'
551        | '\u{d02}'..='\u{d03}'
552        | '\u{d3e}'..='\u{d40}'
553        | '\u{d46}'..='\u{d48}'
554        | '\u{d4a}'..='\u{d4c}'
555        | '\u{d57}'
556        | '\u{d82}'..='\u{d83}'
557        | '\u{dcf}'..='\u{dd1}'
558        | '\u{dd8}'..='\u{ddf}'
559        | '\u{df2}'..='\u{df3}'
560        | '\u{f3e}'..='\u{f3f}'
561        | '\u{f7f}'
562        | '\u{102b}'..='\u{102c}'
563        | '\u{1031}'
564        | '\u{1038}'
565        | '\u{103b}'..='\u{103c}'
566        | '\u{1056}'..='\u{1057}'
567        | '\u{1062}'..='\u{1064}'
568        | '\u{1067}'..='\u{106d}'
569        | '\u{1083}'..='\u{1084}'
570        | '\u{1087}'..='\u{108c}'
571        | '\u{108f}'
572        | '\u{109a}'..='\u{109c}'
573        | '\u{1715}'
574        | '\u{1734}'
575        | '\u{17b6}'
576        | '\u{17be}'..='\u{17c5}'
577        | '\u{17c7}'..='\u{17c8}'
578        | '\u{1923}'..='\u{1926}'
579        | '\u{1929}'..='\u{192b}'
580        | '\u{1930}'..='\u{1931}'
581        | '\u{1933}'..='\u{1938}'
582        | '\u{1a19}'..='\u{1a1a}'
583        | '\u{1a55}'
584        | '\u{1a57}'
585        | '\u{1a61}'
586        | '\u{1a63}'..='\u{1a64}'
587        | '\u{1a6d}'..='\u{1a72}'
588        | '\u{1b04}'
589        | '\u{1b35}'
590        | '\u{1b3b}'
591        | '\u{1b3d}'..='\u{1b41}'
592        | '\u{1b43}'..='\u{1b44}'
593        | '\u{1b82}'
594        | '\u{1ba1}'
595        | '\u{1ba6}'..='\u{1ba7}'
596        | '\u{1baa}'
597        | '\u{1be7}'
598        | '\u{1bea}'..='\u{1bec}'
599        | '\u{1bee}'
600        | '\u{1bf2}'..='\u{1bf3}'
601        | '\u{1c24}'..='\u{1c2b}'
602        | '\u{1c34}'..='\u{1c35}'
603        | '\u{1ce1}'
604        | '\u{1cf7}'
605        | '\u{302e}'..='\u{302f}'
606        | '\u{a823}'..='\u{a824}'
607        | '\u{a827}'
608        | '\u{a880}'..='\u{a881}'
609        | '\u{a8b4}'..='\u{a8c3}'
610        | '\u{a952}'..='\u{a953}'
611        | '\u{a983}'
612        | '\u{a9b4}'..='\u{a9b5}'
613        | '\u{a9ba}'..='\u{a9bb}'
614        | '\u{a9be}'..='\u{a9c0}'
615        | '\u{aa2f}'..='\u{aa30}'
616        | '\u{aa33}'..='\u{aa34}'
617        | '\u{aa4d}'
618        | '\u{aa7b}'
619        | '\u{aa7d}'
620        | '\u{aaeb}'
621        | '\u{aaee}'..='\u{aaef}'
622        | '\u{aaf5}'
623        | '\u{abe3}'..='\u{abe4}'
624        | '\u{abe6}'..='\u{abe7}'
625        | '\u{abe9}'..='\u{abea}'
626        | '\u{abec}'
627        | '\u{11000}'
628        | '\u{11002}'
629        | '\u{11082}'
630        | '\u{110b0}'..='\u{110b2}'
631        | '\u{110b7}'..='\u{110b8}'
632        | '\u{1112c}'
633        | '\u{11145}'..='\u{11146}'
634        | '\u{11182}'
635        | '\u{111b3}'..='\u{111b5}'
636        | '\u{111bf}'..='\u{111c0}'
637        | '\u{111ce}'
638        | '\u{1122c}'..='\u{1122e}'
639        | '\u{11232}'..='\u{11233}'
640        | '\u{11235}'
641        | '\u{112e0}'..='\u{112e2}'
642        | '\u{11302}'..='\u{11303}'
643        | '\u{1133e}'..='\u{1133f}'
644        | '\u{11341}'..='\u{11344}'
645        | '\u{11347}'..='\u{11348}'
646        | '\u{1134b}'..='\u{1134d}'
647        | '\u{11357}'
648        | '\u{11362}'..='\u{11363}'
649        | '\u{11435}'..='\u{11437}'
650        | '\u{11440}'..='\u{11441}'
651        | '\u{11445}'
652        | '\u{114b0}'..='\u{114b2}'
653        | '\u{114b9}'
654        | '\u{114bb}'..='\u{114be}'
655        | '\u{114c1}'
656        | '\u{115af}'..='\u{115b1}'
657        | '\u{115b8}'..='\u{115bb}'
658        | '\u{115be}'
659        | '\u{11630}'..='\u{11632}'
660        | '\u{1163b}'..='\u{1163c}'
661        | '\u{1163e}'
662        | '\u{116ac}'
663        | '\u{116ae}'..='\u{116af}'
664        | '\u{116b6}'
665        | '\u{11720}'..='\u{11721}'
666        | '\u{11726}'
667        | '\u{1182c}'..='\u{1182e}'
668        | '\u{11838}'
669        | '\u{11930}'..='\u{11935}'
670        | '\u{11937}'..='\u{11938}'
671        | '\u{1193d}'
672        | '\u{11940}'
673        | '\u{11942}'
674        | '\u{119d1}'..='\u{119d3}'
675        | '\u{119dc}'..='\u{119df}'
676        | '\u{119e4}'
677        | '\u{11a39}'
678        | '\u{11a57}'..='\u{11a58}'
679        | '\u{11a97}'
680        | '\u{11c2f}'
681        | '\u{11c3e}'
682        | '\u{11ca9}'
683        | '\u{11cb1}'
684        | '\u{11cb4}'
685        | '\u{11d8a}'..='\u{11d8e}'
686        | '\u{11d93}'..='\u{11d94}'
687        | '\u{11d96}'
688        | '\u{11ef5}'..='\u{11ef6}'
689        | '\u{11f03}'
690        | '\u{11f34}'..='\u{11f35}'
691        | '\u{11f3e}'..='\u{11f3f}'
692        | '\u{11f41}'
693        | '\u{16f51}'..='\u{16f87}'
694        | '\u{16ff0}'..='\u{16ff1}'
695        | '\u{1d165}'..='\u{1d166}'
696        | '\u{1d16d}'..='\u{1d172}',
697    )
698}
699
700/// `Indic_Syllabic_Category = Consonant_Preceding_Repha`
701const fn indic_syllabic_category_consonant_preceding_repha(c: char) -> bool {
702    // Unicode 15.1.0, IndicSyllabicCategory.txt
703    matches!(c, '\u{d4e}' | '\u{11941}' | '\u{11d46}' | '\u{11f02}')
704}
705
706/// `Indic_Syllabic_Category = Consonant_Prefixed`
707const fn indic_syllabic_category_consonant_prefixed(c: char) -> bool {
708    // Unicode 15.1.0, IndicSyllabicCategory.txt
709    matches!(
710        c,
711        '\u{111c2}'..='\u{111c3}' | '\u{1193f}' | '\u{11a3a}' | '\u{11a84}'..='\u{11a89}',
712    )
713}
714
715/// `Prepended_Concatenation_Mark = Yes`
716const fn prepended_concatenation_mark(c: char) -> bool {
717    // Unicode 15.1.0, PropList.txt
718    matches!(
719        c,
720        '\u{600}'..='\u{605}' | '\u{6dd}' | '\u{70f}' | '\u{8e2}' | '\u{110bd}' | '\u{110cd}',
721    )
722}
723
724/// Private-Use Area
725pub(crate) const fn is_private_use_area(c: char) -> bool {
726    matches!(
727        c,
728        '\u{e000}'..='\u{f8ff}' | '\u{f0000}'..='\u{ffffd}' | '\u{100000}'..='\u{10fffd}'
729    )
730}