1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
// Copyright 2023 The rust-ggstd authors. All rights reserved.
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
use ;
use ;
/// Bit masks for each code point under U+0100, for fast lookup.
pub const P_C: u8 = 1; // a control character.
pub const P_P: u8 = 2; // a punctuation character.
pub const P_N: u8 = 3; // a numeral.
pub const P_S: u8 = 4; // a symbolic character.
pub const P_Z: u8 = 5; // a spacing character.
pub const P_LU: u8 = 6; // an upper-case letter.
pub const P_LL: u8 = 7; // a lower-case letter.
pub const PP: u8 = 8; // a printable character according to Go's definition.
pub const PG: u8 = PP | P_Z; // a graphical character according to the Unicode definition.
pub const P_LO: u8 = P_LL | P_LU; // a letter that is neither upper nor lower case.
pub const P_LMASK: u8 = P_LO;
// // GraphicRanges defines the set of graphic characters according to Unicode.
// var GraphicRanges = []*RangeTable{
// L, M, N, P, S, Zs,
// }
/// PRINT_RANGES defines the set of printable characters according to Go.
/// ASCII space, U+0020, is handled separately.
static PRINT_RANGES: & = &;
// // is_graphic reports whether the rune is defined as a Graphic by Unicode.
// // Such characters include letters, marks, numbers, punctuation, symbols, and
// // spaces, from categories L, M, N, P, S, Zs.
// fn is_graphic(r rune) bool {
// // We convert to uint32 to avoid the extra test for negative,
// // and in the index we convert to uint8 to avoid the range check.
// if uint32(r) <= MaxLatin1 {
// return properties[uint8(r)]&pg != 0
// }
// return In(r, GraphicRanges...)
// }
/// is_print reports whether the rune is defined as printable by Go. Such
/// characters include letters, marks, numbers, punctuation, symbols, and the
/// ASCII space character, from categories L, M, N, P, S and the ASCII space
/// character. This categorization is the same as is_graphic except that the
/// only spacing character is ASCII space, U+0020.
// // IsOneOf reports whether the rune is a member of one of the ranges.
// // The function "In" provides a nicer signature and should be used in preference to IsOneOf.
// fn IsOneOf(ranges []*RangeTable, r rune) bool {
// for _, inside := range ranges {
// if Is(inside, r) {
// return true
// }
// }
// return false
// }
/// in_range reports whether the rune is a member of one of the ranges.
// // IsControl reports whether the rune is a control character.
// // The C (Other) Unicode category includes more code points
// // such as surrogates; use Is(C, r) to test for them.
// fn IsControl(r rune) bool {
// if uint32(r) <= MaxLatin1 {
// return properties[uint8(r)]&pC != 0
// }
// // All control characters are < MaxLatin1.
// return false
// }
// // IsLetter reports whether the rune is a letter (category L).
// fn IsLetter(r rune) bool {
// if uint32(r) <= MaxLatin1 {
// return properties[uint8(r)]&(pLmask) != 0
// }
// return isExcludingLatin(Letter, r)
// }
// // IsMark reports whether the rune is a mark character (category M).
// fn IsMark(r rune) bool {
// // There are no mark characters in Latin-1.
// return isExcludingLatin(Mark, r)
// }
// // IsNumber reports whether the rune is a number (category N).
// fn IsNumber(r rune) bool {
// if uint32(r) <= MaxLatin1 {
// return properties[uint8(r)]&pN != 0
// }
// return isExcludingLatin(Number, r)
// }
// // IsPunct reports whether the rune is a Unicode punctuation character
// // (category P).
// fn IsPunct(r rune) bool {
// if uint32(r) <= MaxLatin1 {
// return properties[uint8(r)]&pP != 0
// }
// return Is(Punct, r)
// }
// // IsSpace reports whether the rune is a space character as defined
// // by Unicode's White Space property; in the Latin-1 space
// // this is
// //
// // '\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP).
// //
// // Other definitions of spacing characters are set by category
// // Z and property Pattern_White_Space.
// fn IsSpace(r rune) bool {
// // This property isn't the same as Z; special-case it.
// if uint32(r) <= MaxLatin1 {
// switch r {
// case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
// return true
// }
// return false
// }
// return isExcludingLatin(White_Space, r)
// }
// // IsSymbol reports whether the rune is a symbolic character.
// fn IsSymbol(r rune) bool {
// if uint32(r) <= MaxLatin1 {
// return properties[uint8(r)]&pS != 0
// }
// return isExcludingLatin(Symbol, r)
// }