1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
//! Unicode emoji metadata used by scanning and canonicalization.
//!
//! This module provides the ability to look up whether a given character has
//! sanctioned text/emoji variation sequences and what its default presentation
//! side is.
//!
//! The actual data tables are generated at build time by `build.rs` and stored
//! in `unicode_data.rs` inside the build output directory. This module
//! includes them with [`include!`].
// Generated file included at compile time from build.rs output.
// Defines: VARIATION_ENTRIES, EMOJI_MODIFIERS, EMOJI_PRESENTATION_RANGES,
// EMOJI_RANGES, RI_RANGES.
// AUDIT NOTE: VARIATION_ENTRIES and EMOJI_MODIFIERS are sorted by code point
// (BTreeSet in build.rs guarantees this), required for binary search below.
// Range tables are sorted and non-overlapping.
include!;
/// Text presentation selector (Unicode variation selector 15).
pub const TEXT_PRESENTATION_SELECTOR: char = '\u{FE0E}';
/// Emoji presentation selector (Unicode variation selector 16).
pub const EMOJI_PRESENTATION_SELECTOR: char = '\u{FE0F}';
/// Combining enclosing keycap.
pub const COMBINING_ENCLOSING_KEYCAP: char = '\u{20E3}';
/// Zero-width joiner.
pub const ZWJ: char = '\u{200D}';
// --- Inline Predicates ---
/// Returns true if the character is a tag character (U+E0020..U+E007F).
pub
// --- Table-Driven Predicates ---
/// Binary search a sorted, non-overlapping range table for `ch`.
///
/// We use a custom implementation instead of the standard library's
/// `binary_search_by` because this function needs to be `const` for
/// compile-time bitset construction in `variation_set.rs`.
const
/// Binary search a sorted character table for `ch`.
///
/// This is intentionally separate from [`variation_sequence_index`]: callers
/// that build compile-time data need a `const` predicate, while runtime callers
/// still benefit from the standard-library search returning an index.
const
/// The number of variation sequence entries.
pub const VARIATION_ENTRY_COUNT: usize = VARIATION_ENTRIES.len;
/// Return the variation sequence entry at `index`.
pub const
/// Return all code points with sanctioned text and/or emoji variation
/// sequences.
///
/// The returned iterator enumerates exactly the characters for which
/// [`has_variation_sequence`] returns true.
pub + Clone + 'static
/// Return whether a character has a sanctioned text and/or emoji variation
/// sequence, and if so, its index in the table.
///
/// In this crate, this means the character appears in Unicode's
/// `emoji-variation-sequences.txt`.
pub
/// Return whether a character has a sanctioned text and/or emoji variation
/// sequence.
///
/// In this crate, this means the character appears in Unicode's
/// `emoji-variation-sequences.txt`.
pub
/// Return whether a variation-sequence character defaults to text
/// presentation in the pinned Unicode data.
///
/// UTS #51 §4, "Presentation Style", calls text-default characters those
/// expected to have text presentation by default, while still allowing emoji
/// presentation. ED-8a and ED-9a say the only valid text/emoji presentation
/// sequences are those listed in `emoji-variation-sequences.txt`; within that
/// base universe, absence of `Emoji_Presentation` selects the text-default
/// side.
pub const
/// Return whether a variation-sequence character defaults to emoji
/// presentation in the pinned Unicode data.
///
/// UTS #51 §4, "Presentation Style", calls emoji-default characters those
/// expected to have emoji presentation by default, while still allowing text
/// presentation. ED-8a and ED-9a say the only valid text/emoji presentation
/// sequences are those listed in `emoji-variation-sequences.txt`; within that
/// base universe, presence of `Emoji_Presentation` selects the emoji-default
/// side.
pub const
/// Return whether a character has the Unicode `Emoji_Modifier` property.
pub
/// Return whether a character has the Unicode `Emoji` property.
pub const
/// Return whether a character has the Unicode `Regional_Indicator` property.
pub const
// --- Tests ---