1use crate::{
2 imp::{AuthMeta, Constraints, HostMeta, Meta},
3 pct_enc::{table::*, Table, OCTET_TABLE_LO},
4 utf8,
5};
6use core::{
7 num::NonZeroUsize,
8 ops::{Deref, DerefMut},
9 str,
10};
11
12#[derive(Clone, Copy, Debug, Eq, PartialEq)]
14pub enum ParseErrorKind {
15 InvalidPctEncodedOctet,
19 UnexpectedChar,
23 InvalidIpv6Addr,
27}
28
29#[derive(Clone, Copy, Debug, Eq, PartialEq)]
31pub struct ParseError {
32 pub(crate) index: usize,
33 pub(crate) kind: ParseErrorKind,
34}
35
36impl ParseError {
37 #[must_use]
39 pub fn index(&self) -> usize {
40 self.index
41 }
42
43 #[must_use]
45 pub fn kind(&self) -> ParseErrorKind {
46 self.kind
47 }
48}
49
50#[cfg(feature = "impl-error")]
51impl crate::Error for ParseError {}
52
53type Result<T> = core::result::Result<T, crate::parse::ParseError>;
54
55macro_rules! err {
57 ($index:expr, $kind:ident) => {
58 return Err(crate::parse::ParseError {
59 index: $index,
60 kind: crate::parse::ParseErrorKind::$kind,
61 })
62 };
63}
64
65pub(crate) fn parse(bytes: &[u8], constraints: Constraints) -> Result<Meta> {
66 let mut parser = Parser {
67 constraints,
68 reader: Reader::new(bytes),
69 out: Meta::default(),
70 };
71 parser.parse_from_scheme()?;
72 Ok(parser.out)
73}
74
75struct Parser<'a> {
93 constraints: Constraints,
94 reader: Reader<'a>,
95 out: Meta,
96}
97
98struct Reader<'a> {
99 bytes: &'a [u8],
100 pos: usize,
101}
102
103impl<'a> Deref for Parser<'a> {
104 type Target = Reader<'a>;
105
106 fn deref(&self) -> &Self::Target {
107 &self.reader
108 }
109}
110
111impl DerefMut for Parser<'_> {
112 fn deref_mut(&mut self) -> &mut Self::Target {
113 &mut self.reader
114 }
115}
116
117enum PathKind {
118 General,
119 AbEmpty,
120 ContinuedNoScheme,
121}
122
123enum Seg {
124 Normal(u16, bool),
126 Ellipsis,
128 MaybeV4(bool),
130 SingleColon,
132}
133
134impl<'a> Reader<'a> {
135 fn new(bytes: &'a [u8]) -> Self {
136 Reader { bytes, pos: 0 }
137 }
138
139 fn len(&self) -> usize {
140 self.bytes.len()
141 }
142
143 fn has_remaining(&self) -> bool {
144 self.pos < self.len()
145 }
146
147 fn peek(&self, i: usize) -> Option<u8> {
148 self.bytes.get(self.pos + i).copied()
149 }
150
151 fn skip(&mut self, n: usize) {
153 self.pos += n;
155 debug_assert!(self.pos <= self.len());
156 }
157
158 fn read(&mut self, table: &Table) -> Result<bool> {
160 let start = self.pos;
161 self._read(table, |_, _| {})?;
162 Ok(self.pos > start)
163 }
164
165 fn _read(&mut self, table: &Table, mut f: impl FnMut(usize, u32)) -> Result<()> {
166 let mut i = self.pos;
167 let allow_pct_encoded = table.allows_pct_encoded();
168 let allow_non_ascii = table.allows_non_ascii();
169
170 while i < self.len() {
171 let x = self.bytes[i];
172 if allow_pct_encoded && x == b'%' {
173 let [hi, lo, ..] = self.bytes[i + 1..] else {
174 err!(i, InvalidPctEncodedOctet);
175 };
176 if !(HEXDIG.allows_ascii(hi) & HEXDIG.allows_ascii(lo)) {
177 err!(i, InvalidPctEncodedOctet);
178 }
179 i += 3;
180 } else if allow_non_ascii {
181 let (x, len) = utf8::next_code_point(self.bytes, i);
182 if !table.allows_code_point(x) {
183 break;
184 }
185 f(i, x);
186 i += len;
187 } else {
188 if !table.allows_ascii(x) {
189 break;
190 }
191 f(i, x as u32);
192 i += 1;
193 }
194 }
195
196 self.pos = i;
198 Ok(())
199 }
200
201 fn read_str(&mut self, s: &str) -> bool {
202 if self.bytes[self.pos..].starts_with(s.as_bytes()) {
203 self.skip(s.len());
205 true
206 } else {
207 false
208 }
209 }
210
211 fn read_v6(&mut self) -> Option<[u16; 8]> {
212 let mut segs = [0; 8];
213 let mut ellipsis_i = 8;
214
215 let mut i = 0;
216 while i < 8 {
217 match self.read_v6_segment() {
218 Some(Seg::Normal(seg, colon)) => {
219 if colon == (i == 0 || i == ellipsis_i) {
220 return None;
222 }
223 segs[i] = seg;
224 i += 1;
225 }
226 Some(Seg::Ellipsis) => {
227 if ellipsis_i != 8 {
228 return None;
230 }
231 ellipsis_i = i;
232 }
233 Some(Seg::MaybeV4(colon)) => {
234 if i > 6 || colon == (i == ellipsis_i) {
235 return None;
237 }
238 let octets = self.read_v4()?.to_be_bytes();
239 segs[i] = u16::from_be_bytes([octets[0], octets[1]]);
240 segs[i + 1] = u16::from_be_bytes([octets[2], octets[3]]);
241 i += 2;
242 break;
243 }
244 Some(Seg::SingleColon) => return None,
245 None => break,
246 }
247 }
248
249 if ellipsis_i == 8 {
250 if i != 8 {
252 return None;
254 }
255 } else if i == 8 {
256 return None;
258 } else {
259 for j in (ellipsis_i..i).rev() {
261 segs[8 - (i - j)] = segs[j];
262 segs[j] = 0;
263 }
264 }
265
266 Some(segs)
267 }
268
269 fn read_v6_segment(&mut self) -> Option<Seg> {
270 let colon = self.read_str(":");
271 if !self.has_remaining() {
272 return colon.then_some(Seg::SingleColon);
273 }
274
275 let first = self.peek(0).unwrap();
276 let mut x = match OCTET_TABLE_LO[first as usize] {
277 v if v < 128 => v as u16,
278 _ => {
279 return colon.then(|| {
280 if first == b':' {
281 self.skip(1);
283 Seg::Ellipsis
284 } else {
285 Seg::SingleColon
286 }
287 });
288 }
289 };
290 let mut i = 1;
291
292 while i < 4 {
293 let Some(b) = self.peek(i) else {
294 self.skip(i);
296 return None;
297 };
298 match OCTET_TABLE_LO[b as usize] {
299 v if v < 128 => {
300 x = (x << 4) | v as u16;
301 i += 1;
302 continue;
303 }
304 _ if b == b'.' => return Some(Seg::MaybeV4(colon)),
305 _ => break,
306 }
307 }
308 self.skip(i);
310 Some(Seg::Normal(x, colon))
311 }
312
313 fn read_v4(&mut self) -> Option<u32> {
314 let mut addr = self.read_v4_octet()? << 24;
315 for i in (0..3).rev() {
316 if !self.read_str(".") {
317 return None;
318 }
319 addr |= self.read_v4_octet()? << (i * 8);
320 }
321 Some(addr)
322 }
323
324 fn read_v4_octet(&mut self) -> Option<u32> {
325 let mut res = self.peek_digit(0)?;
326 if res == 0 {
327 self.skip(1);
329 return Some(0);
330 }
331
332 for i in 1..3 {
333 let Some(x) = self.peek_digit(i) else {
334 self.skip(i);
336 return Some(res);
337 };
338 res = res * 10 + x;
339 }
340 self.skip(3);
342
343 u8::try_from(res).is_ok().then_some(res)
344 }
345
346 fn peek_digit(&self, i: usize) -> Option<u32> {
347 self.peek(i).and_then(|x| (x as char).to_digit(10))
348 }
349
350 fn read_port(&mut self) {
351 if self.read_str(":") {
352 let mut i = 0;
353 while self.peek_digit(i).is_some() {
354 i += 1;
355 }
356 self.skip(i);
358 }
359 }
360
361 fn read_ip_literal(&mut self) -> Result<Option<HostMeta>> {
362 if !self.read_str("[") {
363 return Ok(None);
364 }
365
366 let start = self.pos;
367
368 let meta = if let Some(_addr) = self.read_v6() {
369 HostMeta::Ipv6(
370 #[cfg(feature = "net")]
371 _addr.into(),
372 )
373 } else if self.pos == start {
374 self.read_ipv_future()?;
375 HostMeta::IpvFuture
376 } else {
377 err!(start, InvalidIpv6Addr);
378 };
379
380 if !self.read_str("]") {
381 err!(self.pos, UnexpectedChar);
382 }
383 Ok(Some(meta))
384 }
385
386 fn read_ipv_future(&mut self) -> Result<()> {
387 if let Some(b'v' | b'V') = self.peek(0) {
388 self.skip(1);
390 if self.read(HEXDIG)? && self.read_str(".") && self.read(IPV_FUTURE)? {
391 return Ok(());
392 }
393 }
394 err!(self.pos, UnexpectedChar);
395 }
396}
397
398pub(crate) fn parse_v4_or_reg_name(bytes: &[u8]) -> HostMeta {
399 let mut reader = Reader::new(bytes);
400 match reader.read_v4() {
401 Some(_addr) if !reader.has_remaining() => HostMeta::Ipv4(
402 #[cfg(feature = "net")]
403 _addr.into(),
404 ),
405 _ => HostMeta::RegName,
406 }
407}
408
409#[cfg(all(feature = "alloc", not(feature = "net")))]
410pub(crate) fn parse_v6(bytes: &[u8]) -> [u16; 8] {
411 Reader::new(bytes).read_v6().unwrap()
412}
413
414impl Parser<'_> {
415 fn select<T>(&self, for_uri: T, for_iri: T) -> T {
416 if self.constraints.ascii_only {
417 for_uri
418 } else {
419 for_iri
420 }
421 }
422
423 fn read_v4_or_reg_name(&mut self) -> Result<HostMeta> {
424 let reg_name_table = self.select(REG_NAME, IREG_NAME);
425 Ok(match (self.read_v4(), self.read(reg_name_table)?) {
426 (Some(_addr), false) => HostMeta::Ipv4(
427 #[cfg(feature = "net")]
428 _addr.into(),
429 ),
430 _ => HostMeta::RegName,
431 })
432 }
433
434 fn read_host(&mut self) -> Result<HostMeta> {
435 match self.read_ip_literal()? {
436 Some(host) => Ok(host),
437 None => self.read_v4_or_reg_name(),
438 }
439 }
440
441 fn parse_from_scheme(&mut self) -> Result<()> {
442 self.read(SCHEME)?;
443
444 if self.peek(0) == Some(b':') {
445 if self.pos > 0 && self.bytes[0].is_ascii_alphabetic() {
447 self.out.scheme_end = NonZeroUsize::new(self.pos);
448 } else {
449 err!(0, UnexpectedChar);
450 }
451
452 self.skip(1);
454 return if self.read_str("//") {
455 self.parse_from_authority()
456 } else {
457 self.parse_from_path(PathKind::General)
458 };
459 } else if self.constraints.scheme_required {
460 err!(self.pos, UnexpectedChar);
461 } else if self.pos == 0 {
462 if self.read_str("//") {
464 return self.parse_from_authority();
465 }
466 }
467 self.parse_from_path(PathKind::ContinuedNoScheme)
469 }
470
471 fn parse_from_authority(&mut self) -> Result<()> {
472 let host;
473
474 let mut colon_cnt = 0;
475 let mut colon_i = 0;
476
477 let auth_start = self.pos;
478
479 let userinfo_table = self.select(USERINFO, IUSERINFO);
480 self._read(userinfo_table, |i, x| {
482 if x == ':' as u32 {
483 colon_cnt += 1;
484 colon_i = i;
485 }
486 })?;
487
488 if self.peek(0) == Some(b'@') {
489 self.skip(1);
492
493 let host_start = self.pos;
494 let meta = self.read_host()?;
495 host = (host_start, self.pos, meta);
496
497 self.read_port();
498 } else if self.pos == auth_start {
499 if let Some(meta) = self.read_ip_literal()? {
501 host = (auth_start, self.pos, meta);
502 self.read_port();
503 } else {
504 host = (self.pos, self.pos, HostMeta::RegName);
506 }
507 } else {
508 let host_end = match colon_cnt {
510 0 => self.pos,
512 1 => {
514 for i in colon_i + 1..self.pos {
515 if !self.bytes[i].is_ascii_digit() {
516 err!(i, UnexpectedChar);
517 }
518 }
519 colon_i
520 }
521 _ => err!(colon_i, UnexpectedChar),
523 };
524
525 let meta = parse_v4_or_reg_name(&self.bytes[auth_start..host_end]);
526 host = (auth_start, host_end, meta);
527 }
528
529 self.out.auth_meta = Some(AuthMeta {
530 host_bounds: (host.0, host.1),
531 host_meta: host.2,
532 });
533 self.parse_from_path(PathKind::AbEmpty)
534 }
535
536 fn parse_from_path(&mut self, kind: PathKind) -> Result<()> {
537 let path_table = self.select(PATH, IPATH);
538 self.out.path_bounds = match kind {
539 PathKind::General => {
540 let start = self.pos;
541 self.read(path_table)?;
542 (start, self.pos)
543 }
544 PathKind::AbEmpty => {
545 let start = self.pos;
546 if self.read(path_table)? && self.bytes[start] != b'/' {
548 err!(start, UnexpectedChar);
549 }
550 (start, self.pos)
551 }
552 PathKind::ContinuedNoScheme => {
553 let segment_table = self.select(SEGMENT_NZ_NC, ISEGMENT_NZ_NC);
554 self.read(segment_table)?;
555
556 if self.peek(0) == Some(b':') {
557 err!(self.pos, UnexpectedChar);
560 }
561
562 self.read(path_table)?;
563 (0, self.pos)
564 }
565 };
566
567 if self.read_str("?") {
568 let query_table = self.select(QUERY, IQUERY);
569 self.read(query_table)?;
570 self.out.query_end = NonZeroUsize::new(self.pos);
571 }
572
573 if self.read_str("#") {
574 let fragment_table = self.select(FRAGMENT, IFRAGMENT);
575 self.read(fragment_table)?;
576 }
577
578 if self.has_remaining() {
579 err!(self.pos, UnexpectedChar);
580 }
581 Ok(())
582 }
583}