1#![cfg_attr(not(test), no_std)]
2
3pub const fn is_unicode_surrotate(c: u32) -> bool {
11 matches!(c, 0xd800..=0xdbff | 0xdc00..=0xdfff)
12}
13
14pub mod control {
15 pub const fn is_newline(c: u32) -> bool {
19 c == 0xa
20 }
21
22 pub const fn is_carriage_return(c: u32) -> bool {
24 c == 0xd
25 }
26
27 pub const fn is_horizontal_tab(c: u32) -> bool {
29 c == 0x9
30 }
31
32
33 pub const fn is_useful_control(c: u32) -> bool {
35 is_newline(c)
36 || is_carriage_return(c)
37 || is_horizontal_tab(c)
38 }
39
40 pub const fn is_c0_control(c: u32) -> bool {
42 matches!(c, 0x0..=0x1f)
43 }
44
45 pub const fn is_c1_control(c: u32) -> bool {
47 matches!(c, 0x80..=0x9f)
48 }
49
50 pub const fn is_legacy_control(c: u32) -> bool {
53 !is_useful_control(c)
54 && (is_c0_control(c)
55 || is_c1_control(c))
56 }
57}
58
59
60
61
62pub const fn is_noncharacter(c: u32) -> bool {
64 matches!(c,
65 0xfdd0..=0xfdef
66 | 0xfffe..=0xffff
67 | 0x1fffe..=0x1ffff
68 | 0x2fffe..=0x2ffff
69 | 0x3fffe..=0x3ffff
70 | 0x4fffe..=0x4ffff
71 | 0x5fffe..=0x5ffff
72 | 0x6fffe..=0x6ffff
73 | 0x7fffe..=0x7ffff
74 | 0x8fffe..=0x8ffff
75 | 0x9fffe..=0x9ffff
76 | 0xafffe..=0xaffff
77 | 0xbfffe..=0xbffff
78 | 0xcfffe..=0xcffff
79 | 0xdfffe..=0xdffff
80 | 0xefffe..=0xeffff
81 | 0xffffe..=0xfffff
82 | 0x10fffe..=0x10ffff
83 )
84}
85
86pub struct UnicodeScalars {}
89
90impl UnicodeScalars {
91 pub const fn contains(c: u32) -> bool {
92 !is_unicode_surrotate(c)
93 }
94}
95
96pub struct XmlCharacters {}
99
100impl XmlCharacters {
101 pub const fn contains(c: u32) -> bool {
102 !(control::is_c0_control(c)
103 && !control::is_useful_control(c))
104 && !is_unicode_surrotate(c)
105 && !matches!(c, 0xfffe..=0xffff)
106 }
107}
108
109pub struct UnicodeAssignables {}
111
112impl UnicodeAssignables {
113 pub const fn contains(c: u32) -> bool {
114 c != 0x7f && !(control::is_c0_control(c)
116 && !control::is_useful_control(c))
117 && !control::is_c1_control(c)
118 && !is_unicode_surrotate(c)
119 && !is_noncharacter(c)
120 }
121}
122
123#[cfg(test)]
124mod test {
125 use super::*;
126 use core::ops::RangeInclusive;
127
128 #[track_caller]
129 fn assert_predicate(p: fn(u32) -> bool, ranges: &[RangeInclusive<u32>]) {
130 let mut last = 0;
131 for range in ranges {
132 for i in last..*range.start() {
133 assert!(
134 p(i) == false,
135 "{}: {:x} should not be included but is",
136 core::panic::Location::caller(),
137 i);
138 }
139 last = *range.end() + 1;
140 for u in range.clone() {
141 assert!(
142 p(u),
143 "{}: {:x} should be included but isn't",
144 core::panic::Location::caller(),
145 u);
146 }
147 }
148 for i in last..=(char::MAX as u32) {
149 assert!(
150 p(i) == false,
151 "{}: {:x} should not be included but is",
152 core::panic::Location::caller(),
153 i);
154 }
155 }
156
157 #[test]
158 fn test_scalars() {
159 let ranges = [
160 0x0..=0xd7ff,
161 0xe000..=0x10ffff
162 ];
163 assert_predicate(UnicodeScalars::contains, &ranges);
164 }
165
166
167 #[test]
168 fn test_xml() {
169 let ranges = [
170 0x9_u32..=0x9,
171 0xa..=0xa,
172 0xd..=0xd,
173 0x20..=0xd7ff,
174 0xe000..=0xfffd,
175 0x10000..=0x10ffff
176 ];
177 assert_predicate(XmlCharacters::contains, &ranges);
178 }
179
180 #[test]
181 fn test_assignable() {
182 let ranges = [
183 0x9_u32..=0x9,
184 0xa..=0xa,
185 0xd..=0xd,
186 0x20..=0x7e,
187 0xa0..=0xd7ff,
188 0xe000..=0xfdcf,
189 0xfdf0..=0xfffd,
190 0x10000..=0x1fffd,
191 0x20000..=0x2fffd,
192 0x30000..=0x3fffd,
193 0x40000..=0x4fffd,
194 0x50000..=0x5fffd,
195 0x60000..=0x6fffd,
196 0x70000..=0x7fffd,
197 0x80000..=0x8fffd,
198 0x90000..=0x9fffd,
199 0xa0000..=0xafffd,
200 0xb0000..=0xbfffd,
201 0xc0000..=0xcfffd,
202 0xd0000..=0xdfffd,
203 0xe0000..=0xefffd,
204 0xf0000..=0xffffd,
205 0x100000..=0x10fffd,
206 ];
207 assert_predicate(UnicodeAssignables::contains, &ranges);
208 }
209}