1use unic_char_property::TotalCharProperty;
12
13char_property! {
14 pub enum GeneralCategory {
21 abbr => "gc";
22 long => "General_Category";
23 human => "General Category";
24
25 UppercaseLetter {
27 abbr => Lu,
28 long => Uppercase_Letter,
29 human => "Uppercase Letter",
30 }
31
32 LowercaseLetter {
34 abbr => Ll,
35 long => Lowercase_Letter,
36 human => "Lowercase Letter",
37 }
38
39 TitlecaseLetter {
41 abbr => Lt,
42 long => Titlecase_Letter,
43 human => "Titlecase Letter",
44 }
45
46 ModifierLetter {
48 abbr => Lm,
49 long => Modifier_Letter,
50 human => "Modifier Letter",
51 }
52
53 OtherLetter {
55 abbr => Lo,
56 long => Other_Letter,
57 human => "Other Letter",
58 }
59
60 NonspacingMark {
62 abbr => Mn,
63 long => Nonspacing_Mark,
64 human => "Nonspacing Mark",
65 }
66
67 SpacingMark {
69 abbr => Mc,
70 long => Spacing_Mark,
71 human => "Spacing Mark",
72 }
73
74 EnclosingMark {
76 abbr => Me,
77 long => Enclosing_Mark,
78 human => "Enclosing Mark",
79 }
80
81 DecimalNumber {
83 abbr => Nd,
84 long => Decimal_Number,
85 human => "Decimal Digit",
86 }
87
88 LetterNumber {
90 abbr => Nl,
91 long => Letter_Number,
92 human => "Letterlike Number",
93 }
94
95 OtherNumber {
97 abbr => No,
98 long => Other_Number,
99 human => "Other Numeric",
100 }
101
102 ConnectorPunctuation {
104 abbr => Pc,
105 long => Connector_Punctuation,
106 human => "Connecting Punctuation",
107 }
108
109 DashPunctuation {
111 abbr => Pd,
112 long => Dash_Punctuation,
113 human => "Dash Punctuation",
114 }
115
116 OpenPunctuation {
118 abbr => Ps,
119 long => Open_Punctuation,
120 human => "Opening Punctuation",
121 }
122
123 ClosePunctuation {
125 abbr => Pe,
126 long => Close_Punctuation,
127 human => "Closing Punctuation",
128 }
129
130 InitialPunctuation {
132 abbr => Pi,
133 long => Initial_Punctuation,
134 human => "Initial Quotation",
135 }
136
137 FinalPunctuation {
139 abbr => Pf,
140 long => Final_Punctuation,
141 human => "Final Quotation",
142 }
143
144 OtherPunctuation {
146 abbr => Po,
147 long => Other_Punctuation,
148 human => "Other Punctuation",
149 }
150
151 MathSymbol {
153 abbr => Sm,
154 long => Math_Symbol,
155 human => "Math Symbol",
156 }
157
158 CurrencySymbol {
160 abbr => Sc,
161 long => Currency_Symbol,
162 human => "Currency Symbol",
163 }
164
165 ModifierSymbol {
167 abbr => Sk,
168 long => Modifier_Symbol,
169 human => "Modifier Symbol",
170 }
171
172 OtherSymbol {
174 abbr => So,
175 long => Other_Symbol,
176 human => "Other Symbol",
177 }
178
179 SpaceSeparator {
181 abbr => Zs,
182 long => Space_Separator,
183 human => "Space",
184 }
185
186 LineSeparator {
188 abbr => Zl,
189 long => Line_Separator,
190 human => "Line Separator",
191 }
192
193 ParagraphSeparator {
195 abbr => Zp,
196 long => Paragraph_Separator,
197 human => "Paragraph Separator",
198 }
199
200 Control {
202 abbr => Cc,
203 long => Control,
204 human => "Control",
205 }
206
207 Format {
209 abbr => Cf,
210 long => Format,
211 human => "Formatting",
212 }
213
214 Surrogate {
216 abbr => Cs,
217 long => Surrogate,
218 human => "Surrogate",
219 }
220
221 PrivateUse {
223 abbr => Co,
224 long => Private_Use,
225 human => "Private-Use",
226 }
227
228 Unassigned {
230 abbr => Cn,
231 long => Unassigned,
232 human => "Unassigned",
233 }
234 }
235
236 pub mod abbr_names for abbr;
237 pub mod long_names for long;
238}
239
240impl TotalCharProperty for GeneralCategory {
241 fn of(ch: char) -> Self {
242 Self::of(ch)
243 }
244}
245
246impl Default for GeneralCategory {
247 fn default() -> Self {
248 GeneralCategory::Unassigned
249 }
250}
251
252mod data {
253 use super::abbr_names::*;
254 use unic_char_property::tables::CharDataTable;
255 pub const GENERAL_CATEGORY_TABLE: CharDataTable<super::GeneralCategory> =
256 include!("../tables/general_category.rsv");
257}
258
259impl GeneralCategory {
260 pub fn of(ch: char) -> GeneralCategory {
262 data::GENERAL_CATEGORY_TABLE.find_or_default(ch)
263 }
264}
265
266impl GeneralCategory {
267 pub fn is_cased_letter(&self) -> bool {
269 use self::abbr_names::*;
270 matches!(*self, Lu | Ll | Lt)
271 }
272
273 pub fn is_letter(&self) -> bool {
275 use self::abbr_names::*;
276 matches!(*self, Lu | Ll | Lt | Lm | Lo)
277 }
278
279 pub fn is_mark(&self) -> bool {
281 use self::abbr_names::*;
282 matches!(*self, Mn | Mc | Me)
283 }
284
285 pub fn is_number(&self) -> bool {
287 use self::abbr_names::*;
288 matches!(*self, Nd | Nl | No)
289 }
290
291 pub fn is_punctuation(&self) -> bool {
293 use self::abbr_names::*;
294 matches!(*self, Pc | Pd | Ps | Pe | Pi | Pf | Po)
295 }
296
297 pub fn is_symbol(&self) -> bool {
299 use self::abbr_names::*;
300 matches!(*self, Sm | Sc | Sk | So)
301 }
302
303 pub fn is_separator(&self) -> bool {
305 use self::abbr_names::*;
306 matches!(*self, Zs | Zl | Zp)
307 }
308
309 pub fn is_other(&self) -> bool {
311 use self::abbr_names::*;
312 matches!(*self, Cc | Cf | Cs | Co | Cn)
313 }
314}
315
316#[cfg(test)]
317mod tests {
318 use super::GeneralCategory as GC;
319 use core::char;
320 use unic_char_property::EnumeratedCharProperty;
321
322 #[test]
323 fn test_ascii() {
324 for c in 0x00..(0x1F + 1) {
325 let c = char::from_u32(c).unwrap();
326 assert_eq!(GC::of(c), GC::Control);
327 }
328
329 assert_eq!(GC::of(' '), GC::SpaceSeparator);
330 assert_eq!(GC::of('!'), GC::OtherPunctuation);
331 assert_eq!(GC::of('"'), GC::OtherPunctuation);
332 assert_eq!(GC::of('#'), GC::OtherPunctuation);
333 assert_eq!(GC::of('$'), GC::CurrencySymbol);
334 assert_eq!(GC::of('%'), GC::OtherPunctuation);
335 assert_eq!(GC::of('&'), GC::OtherPunctuation);
336 assert_eq!(GC::of('\''), GC::OtherPunctuation);
337 assert_eq!(GC::of('('), GC::OpenPunctuation);
338 assert_eq!(GC::of(')'), GC::ClosePunctuation);
339 assert_eq!(GC::of('*'), GC::OtherPunctuation);
340 assert_eq!(GC::of('+'), GC::MathSymbol);
341 assert_eq!(GC::of(','), GC::OtherPunctuation);
342 assert_eq!(GC::of('-'), GC::DashPunctuation);
343 assert_eq!(GC::of('.'), GC::OtherPunctuation);
344 assert_eq!(GC::of('/'), GC::OtherPunctuation);
345
346 for c in ('0' as u32)..('9' as u32 + 1) {
347 let c = char::from_u32(c).unwrap();
348 assert_eq!(GC::of(c), GC::DecimalNumber);
349 }
350
351 assert_eq!(GC::of(':'), GC::OtherPunctuation);
352 assert_eq!(GC::of(';'), GC::OtherPunctuation);
353 assert_eq!(GC::of('<'), GC::MathSymbol);
354 assert_eq!(GC::of('='), GC::MathSymbol);
355 assert_eq!(GC::of('>'), GC::MathSymbol);
356 assert_eq!(GC::of('?'), GC::OtherPunctuation);
357 assert_eq!(GC::of('@'), GC::OtherPunctuation);
358
359 for c in ('A' as u32)..('Z' as u32 + 1) {
360 let c = char::from_u32(c).unwrap();
361 assert_eq!(GC::of(c), GC::UppercaseLetter);
362 }
363
364 assert_eq!(GC::of('['), GC::OpenPunctuation);
365 assert_eq!(GC::of('\\'), GC::OtherPunctuation);
366 assert_eq!(GC::of(']'), GC::ClosePunctuation);
367 assert_eq!(GC::of('^'), GC::ModifierSymbol);
368 assert_eq!(GC::of('_'), GC::ConnectorPunctuation);
369 assert_eq!(GC::of('`'), GC::ModifierSymbol);
370
371 for c in ('a' as u32)..('z' as u32 + 1) {
372 let c = char::from_u32(c).unwrap();
373 assert_eq!(GC::of(c), GC::LowercaseLetter);
374 }
375
376 assert_eq!(GC::of('{'), GC::OpenPunctuation);
377 assert_eq!(GC::of('|'), GC::MathSymbol);
378 assert_eq!(GC::of('}'), GC::ClosePunctuation);
379 assert_eq!(GC::of('~'), GC::MathSymbol);
380 }
381
382 #[test]
383 fn test_bmp_edge() {
384 let bom = '\u{FEFF}';
386 assert_eq!(GC::of(bom), GC::Format);
387 assert_eq!(GC::of(''), GC::OtherSymbol);
389 assert_eq!(GC::of('�'), GC::OtherSymbol);
391
392 for &c in [0xFFEF, 0xFFFE, 0xFFFF].iter() {
393 let c = char::from_u32(c).unwrap();
394 assert_eq!(GC::of(c), GC::Unassigned);
395 }
396 }
397
398 #[test]
399 fn test_private_use() {
400 for c in 0xF_0000..(0xF_FFFD + 1) {
401 let c = char::from_u32(c).unwrap();
402 assert_eq!(GC::of(c), GC::PrivateUse);
403 }
404
405 for c in 0x10_0000..(0x10_FFFD + 1) {
406 let c = char::from_u32(c).unwrap();
407 assert_eq!(GC::of(c), GC::PrivateUse);
408 }
409
410 for &c in [0xF_FFFE, 0xF_FFFF, 0x10_FFFE, 0x10_FFFF].iter() {
411 let c = char::from_u32(c).unwrap();
412 assert_eq!(GC::of(c), GC::Unassigned);
413 }
414 }
415
416 #[test]
417 fn test_abbr_name() {
418 assert_eq!(GC::UppercaseLetter.abbr_name(), "Lu");
419 assert_eq!(GC::Unassigned.abbr_name(), "Cn");
420 }
421
422 #[test]
423 fn test_long_name() {
424 assert_eq!(GC::UppercaseLetter.long_name(), "Uppercase_Letter");
425 assert_eq!(GC::Unassigned.long_name(), "Unassigned");
426 }
427
428 #[test]
429 fn test_human_name() {
430 assert_eq!(GC::UppercaseLetter.human_name(), "Uppercase Letter");
431 assert_eq!(GC::Unassigned.human_name(), "Unassigned");
432 }
433}