1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
//! Named constants for Unicode codepoint ranges used throughout the library.
//!
//! Centralising these ranges prevents magic numbers from being duplicated
//! across modules and makes future Unicode version updates easier to apply.
// ── CJK ──────────────────────────────────────────────────────────────────────
/// CJK Unified Ideographs (U+4E00–U+9FFF)
pub const CJK_UNIFIED: RangeInclusive = 0x4E00..=0x9FFF;
/// CJK Extension A (U+3400–U+4DBF)
pub const CJK_EXT_A: RangeInclusive = 0x3400..=0x4DBF;
/// CJK Extension A + Unified combined range used for capacity estimation
/// (U+3000–U+9FFF, covers CJK Symbols, kana, and Unified Ideographs)
pub const CJK_CAPACITY_RANGE: RangeInclusive = 0x3000..=0x9FFF;
/// CJK Compatibility Ideographs (U+F900–U+FAFF)
pub const CJK_COMPAT: RangeInclusive = 0xF900..=0xFAFF;
// ── Hangul ────────────────────────────────────────────────────────────────────
/// Hangul Syllables (U+AC00–U+D7AF)
pub const HANGUL_SYLLABLES: RangeInclusive = 0xAC00..=0xD7AF;
/// Hangul Compatibility Jamo (U+3131–U+3163)
pub const HANGUL_COMPAT_JAMO: RangeInclusive = 0x3131..=0x3163;
// ── Kana ──────────────────────────────────────────────────────────────────────
/// Hiragana block (U+3040–U+309F)
pub const HIRAGANA: RangeInclusive = 0x3040..=0x309F;
/// Katakana block (U+30A0–U+30FF)
pub const KATAKANA: RangeInclusive = 0x30A0..=0x30FF;
/// Half-width Katakana (U+FF65–U+FF9F)
pub const KATAKANA_HALFWIDTH: RangeInclusive = 0xFF65..=0xFF9F;
// ── Indic (Brahmic) ─────────────────────────────────────────────────────────
/// Indic Brahmic scripts: Devanagari through Sinhala (U+0900–U+0DFF)
pub const INDIC: RangeInclusive = 0x0900..=0x0DFF;
// ── Tibetan ─────────────────────────────────────────────────────────────────
/// Tibetan block (U+0F00–U+0FFF)
pub const TIBETAN: RangeInclusive = 0x0F00..=0x0FFF;
// ── Myanmar ─────────────────────────────────────────────────────────────────
/// Myanmar block (U+1000–U+109F)
pub const MYANMAR: RangeInclusive = 0x1000..=0x109F;
// ── Khmer ───────────────────────────────────────────────────────────────────
/// Khmer block (U+1780–U+17FF)
pub const KHMER: RangeInclusive = 0x1780..=0x17FF;
// ── Balinese ──────────────────────────────────────────────────────────────
/// Balinese block (U+1B00–U+1B7F)
pub const BALINESE: RangeInclusive = 0x1B00..=0x1B7F;
// ── Javanese ──────────────────────────────────────────────────────────────
/// Javanese block (U+A980–U+A9DF)
pub const JAVANESE: RangeInclusive = 0xA980..=0xA9DF;
// ── Sundanese ─────────────────────────────────────────────────────────────
/// Sundanese block (U+1B80–U+1BBF)
pub const SUNDANESE: RangeInclusive = 0x1B80..=0x1BBF;
// ── Tai Tham ──────────────────────────────────────────────────────────────
/// Tai Tham (Lanna) block (U+1A20–U+1AAF)
pub const TAI_THAM: RangeInclusive = 0x1A20..=0x1AAF;
// ── Cham ──────────────────────────────────────────────────────────────────
/// Cham block (U+AA00–U+AA5F)
pub const CHAM: RangeInclusive = 0xAA00..=0xAA5F;
// ── Batak ─────────────────────────────────────────────────────────────────
/// Batak block (U+1BC0–U+1BFF)
pub const BATAK: RangeInclusive = 0x1BC0..=0x1BFF;
// ── Buginese ──────────────────────────────────────────────────────────────
/// Buginese (Lontara) block (U+1A00–U+1A1F)
pub const BUGINESE: RangeInclusive = 0x1A00..=0x1A1F;
// ── Philippine scripts ───────────────────────────────────────────────────
/// Tagalog block (U+1700–U+171F)
pub const TAGALOG: RangeInclusive = 0x1700..=0x171F;
/// Hanunoo block (U+1720–U+173F)
pub const HANUNOO: RangeInclusive = 0x1720..=0x173F;
/// Buhid block (U+1740–U+175F)
pub const BUHID: RangeInclusive = 0x1740..=0x175F;
/// Tagbanwa block (U+1760–U+177F)
pub const TAGBANWA: RangeInclusive = 0x1760..=0x177F;
// ── Meetei Mayek ──────────────────────────────────────────────────────────
/// Meetei Mayek block (U+ABC0–U+ABFF)
pub const MEETEI_MAYEK: RangeInclusive = 0xABC0..=0xABFF;
/// Meetei Mayek Extensions block (U+AAE0–U+AAFF)
pub const MEETEI_MAYEK_EXT: RangeInclusive = 0xAAE0..=0xAAFF;