fluent_uri/pct_enc/
table.rs1use crate::utf8;
9
10const TABLE_LEN: usize = 256 + 3;
11const INDEX_PCT_ENCODED: usize = 256;
12const INDEX_UCSCHAR: usize = 256 + 1;
13const INDEX_IPRIVATE: usize = 256 + 2;
14
15const fn is_ucschar(x: u32) -> bool {
16 matches!(x, 0xa0..=0xd7ff | 0xf900..=0xfdcf | 0xfdf0..=0xffef)
17 || (x >= 0x10000 && x <= 0xdffff && (x & 0xffff) <= 0xfffd)
18 || (x >= 0xe1000 && x <= 0xefffd)
19}
20
21const fn is_iprivate(x: u32) -> bool {
22 (x >= 0xe000 && x <= 0xf8ff) || (x >= 0xf0000 && (x & 0xffff) <= 0xfffd)
23}
24
25#[derive(Clone, Copy, Debug)]
27pub struct Table {
28 table: [bool; TABLE_LEN],
29}
30
31impl Table {
32 #[must_use]
38 pub const fn new(mut bytes: &[u8]) -> Self {
39 let mut table = [false; TABLE_LEN];
40 while let [cur, rem @ ..] = bytes {
41 assert!(
42 cur.is_ascii() && *cur != b'%',
43 "cannot allow non-ASCII byte or %"
44 );
45 table[*cur as usize] = true;
46 bytes = rem;
47 }
48 Self { table }
49 }
50
51 #[must_use]
56 pub const fn or(mut self, other: &Self) -> Self {
57 let mut i = 0;
58 while i < TABLE_LEN {
59 self.table[i] |= other.table[i];
60 i += 1;
61 }
62 self
63 }
64
65 #[must_use]
67 pub const fn or_pct_encoded(mut self) -> Self {
68 self.table[INDEX_PCT_ENCODED] = true;
69 self
70 }
71
72 #[must_use]
77 pub const fn or_ucschar(mut self) -> Self {
78 self.table[INDEX_UCSCHAR] = true;
79 self
80 }
81
82 #[must_use]
87 pub const fn or_iprivate(mut self) -> Self {
88 self.table[INDEX_IPRIVATE] = true;
89 self
90 }
91
92 #[must_use]
97 pub const fn sub(mut self, other: &Self) -> Self {
98 let mut i = 0;
99 while i < TABLE_LEN {
100 self.table[i] &= !other.table[i];
101 i += 1;
102 }
103 self
104 }
105
106 #[must_use]
109 pub const fn is_subset(&self, other: &Self) -> bool {
110 let mut i = 0;
111 while i < TABLE_LEN {
112 if self.table[i] & !other.table[i] {
113 return false;
114 }
115 i += 1;
116 }
117 true
118 }
119
120 #[inline]
121 pub(crate) const fn allows_ascii(&self, x: u8) -> bool {
122 self.table[x as usize]
123 }
124
125 #[inline]
126 pub(crate) const fn allows_non_ascii(&self) -> bool {
127 self.table[INDEX_UCSCHAR] | self.table[INDEX_IPRIVATE]
128 }
129
130 pub(crate) const fn allows_code_point(&self, x: u32) -> bool {
131 if x < 128 {
132 self.table[x as usize]
133 } else {
134 (self.table[INDEX_UCSCHAR] && is_ucschar(x))
135 || (self.table[INDEX_IPRIVATE] && is_iprivate(x))
136 }
137 }
138
139 #[inline]
141 #[must_use]
142 pub const fn allows(&self, ch: char) -> bool {
143 self.allows_code_point(ch as u32)
144 }
145
146 #[inline]
148 #[must_use]
149 pub const fn allows_pct_encoded(&self) -> bool {
150 self.table[INDEX_PCT_ENCODED]
151 }
152
153 pub(crate) const fn validate(&self, s: &[u8]) -> bool {
155 let mut i = 0;
156 let allow_pct_encoded = self.allows_pct_encoded();
157 let allow_non_ascii = self.allows_non_ascii();
158
159 while i < s.len() {
160 let x = s[i];
161 if allow_pct_encoded && x == b'%' {
162 if i + 2 >= s.len() {
163 return false;
164 }
165 let (hi, lo) = (s[i + 1], s[i + 2]);
166
167 if !(HEXDIG.allows_ascii(hi) & HEXDIG.allows_ascii(lo)) {
168 return false;
169 }
170 i += 3;
171 } else if allow_non_ascii {
172 let (x, len) = utf8::next_code_point(s, i);
173 if !self.allows_code_point(x) {
174 return false;
175 }
176 i += len;
177 } else {
178 if !self.allows_ascii(x) {
179 return false;
180 }
181 i += 1;
182 }
183 }
184 true
185 }
186}
187
188const fn new(bytes: &[u8]) -> Table {
189 Table::new(bytes)
190}
191
192pub const ALPHA: &Table = &new(b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
196
197pub const DIGIT: &Table = &new(b"0123456789");
199
200pub const HEXDIG: &Table = &DIGIT.or(&new(b"ABCDEFabcdef"));
202
203pub const SCHEME: &Table = &ALPHA.or(DIGIT).or(&new(b"+-."));
205
206pub const USERINFO: &Table = &UNRESERVED.or(SUB_DELIMS).or(&new(b":")).or_pct_encoded();
208
209pub const IPV_FUTURE: &Table = &UNRESERVED.or(SUB_DELIMS).or(&new(b":"));
211
212pub const REG_NAME: &Table = &UNRESERVED.or(SUB_DELIMS).or_pct_encoded();
214
215pub const PATH: &Table = &PCHAR.or(&new(b"/"));
217
218pub const SEGMENT_NZ_NC: &Table = &UNRESERVED.or(SUB_DELIMS).or(&new(b"@")).or_pct_encoded();
220
221pub const PCHAR: &Table = &UNRESERVED.or(SUB_DELIMS).or(&new(b":@")).or_pct_encoded();
223
224pub const QUERY: &Table = &PCHAR.or(&new(b"/?"));
226
227pub const FRAGMENT: &Table = QUERY;
229
230pub const UNRESERVED: &Table = &ALPHA.or(DIGIT).or(&new(b"-._~"));
232
233pub const RESERVED: &Table = &GEN_DELIMS.or(SUB_DELIMS);
235
236pub const GEN_DELIMS: &Table = &new(b":/?#[]@");
238
239pub const SUB_DELIMS: &Table = &new(b"!$&'()*+,;=");
242
243pub const IUSERINFO: &Table = &USERINFO.or_ucschar();
246pub const IREG_NAME: &Table = ®_NAME.or_ucschar();
247pub const IPATH: &Table = &PATH.or_ucschar();
248pub const ISEGMENT_NZ_NC: &Table = &SEGMENT_NZ_NC.or_ucschar();
249pub const IQUERY: &Table = &QUERY.or_ucschar().or_iprivate();
250pub const IFRAGMENT: &Table = &FRAGMENT.or_ucschar();