1use std::{
2 borrow::{Borrow, Cow},
3 ops::Deref,
4 path::Path,
5 rc::Rc,
6 str::{from_utf8, from_utf8_unchecked},
7 sync::Arc,
8};
9
10use crate::ParseRIError;
11
12#[derive(Debug, PartialEq, Eq, Hash)]
13#[repr(transparent)]
14pub struct URIStr {
15 uri: str,
16}
17
18impl URIStr {
19 fn new(s: &str) -> &Self {
20 unsafe {
21 &*(s as *const str as *const Self)
25 }
26 }
27
28 pub fn resolve(&self, reference: &Self) -> URIString {
38 use Component::*;
39
40 let base = if self.is_absolute() {
41 Cow::Borrowed(self)
42 } else {
43 let mut base = self.to_owned();
44 base.normalize();
45 if let Some(frag) = base.uri.bytes().position(|b| b == b'#') {
46 base.uri.truncate(frag);
47 }
48 assert!(
49 base.is_absolute(),
50 "'{}' is not absolute",
51 base.as_escaped_str()
52 );
53 Cow::Owned(base)
54 };
55
56 let mut ref_components = reference.components().peekable();
57 if ref_components
58 .next_if(|comp| matches!(comp, Scheme(_)))
59 .is_some()
60 {
61 let mut ret = reference.to_owned();
62 ret.normalize();
63 return ret;
64 }
65
66 if ref_components
67 .next_if(|comp| matches!(comp, Authority { .. }))
68 .is_some()
69 {
70 let mut ret = URIString {
72 uri: [base.scheme().unwrap(), ":", &reference.uri].concat(),
73 };
74 ret.normalize();
75 return ret;
76 }
77
78 let mut components = base.components().peekable();
79 let mut uri = String::new();
80 if let Some(Scheme(scheme)) = components.next_if(|comp| matches!(comp, Scheme(_))) {
81 uri.push_str(scheme);
82 uri.push(':');
83 }
84 if let Some(Authority {
85 userinfo,
86 host,
87 port,
88 }) = components.next_if(|comp| matches!(comp, Authority { .. }))
89 {
90 uri.push_str("//");
91 if let Some(userinfo) = userinfo {
92 uri.push_str(userinfo);
93 uri.push(':');
94 }
95 uri.push_str(host);
96 if let Some(port) = port {
97 uri.push(':');
98 uri.push_str(port);
99 }
100 }
101
102 if ref_components
103 .next_if(|comp| matches!(comp, RootSegment))
104 .is_some()
105 {
106 uri.push_str(&reference.uri);
107 let mut ret = URIString { uri };
108 ret.normalize();
109 return ret;
110 }
111
112 let mut segments = vec![];
113 let has_root = components
114 .next_if(|comp| matches!(comp, RootSegment))
115 .is_some();
116 let mut has_dot_segment = false;
117 while let Some(Segment(segment)) = components.next_if(|comp| matches!(comp, Segment(_))) {
118 segments.push(segment);
119 has_dot_segment |= segment == "." || segment == "..";
120 }
121 if has_dot_segment {
122 segments = normalize_path_segments(segments.into_iter(), has_root);
123 }
124
125 let mut has_path = false;
126 if let Some(Segment(segment)) = ref_components.next_if(|comp| matches!(comp, Segment(_))) {
127 let mut buf = vec![segment];
128 while let Some(Segment(segment)) =
129 ref_components.next_if(|comp| matches!(comp, Segment(_)))
130 {
131 buf.push(segment);
132 }
133 if buf.len() > 1 || !buf[0].is_empty() {
134 segments.pop();
135 segments.extend(buf);
136 has_path = true;
137 }
138 }
139 build_normalized_path(segments.into_iter(), has_root, &mut uri);
140
141 if let Some(Query(query)) = ref_components.next_if(|comp| matches!(comp, Query(_))) {
142 uri.push('?');
143 uri.push_str(query);
144 } else if !has_path
145 && let Some(Query(query)) = components.next_if(|comp| matches!(comp, Query(_)))
146 {
147 uri.push('?');
148 uri.push_str(query);
149 }
150
151 if let Some(Fragment(fragment)) = ref_components.next() {
152 uri.push('#');
153 uri.push_str(fragment);
154 }
155
156 URIString { uri }
157 }
158
159 pub fn as_escaped_str(&self) -> &str {
161 &self.uri
162 }
163
164 pub fn as_unescaped_str(&self) -> Option<Cow<'_, str>> {
167 unescape(&self.uri).ok()
168 }
169
170 pub fn is_absolute(&self) -> bool {
173 self.scheme().is_some() && self.fragment().is_none()
174 }
175
176 pub fn is_relative(&self) -> bool {
179 self.scheme().is_none()
180 }
181
182 pub fn scheme(&self) -> Option<&str> {
185 let pos = self.uri.bytes().position(is_reserved)?;
186 (self.uri.as_bytes()[pos] == b':').then_some(&self.uri[..pos])
187 }
188
189 pub fn authority(&self) -> Option<&str> {
192 let rem = self
193 .uri
194 .strip_prefix("//")
195 .or_else(|| self.uri.split_once("://").map(|p| p.1))?;
196 Some(rem.split_once('/').map(|p| p.0).unwrap_or(rem))
197 }
198
199 pub fn userinfo(&self) -> Option<&str> {
202 Some(self.authority()?.split_once('@')?.0)
203 }
204
205 pub fn host(&self) -> Option<&str> {
208 let mut auth = self.authority()?;
209 if let Some((_userinfo, rem)) = auth.split_once('@') {
210 auth = rem;
211 }
212 if let Some((host, port)) = auth.rsplit_once(':')
213 && port.bytes().all(|b| b.is_ascii_digit())
214 {
215 auth = host;
216 }
217 Some(auth)
218 }
219
220 pub fn port(&self) -> Option<&str> {
223 let (_, port) = self.authority()?.rsplit_once(':')?;
224 port.bytes().all(|b| b.is_ascii_digit()).then_some(port)
225 }
226
227 pub fn path(&self) -> &str {
230 let mut path = &self.uri;
231 if let Some(scheme) = self.scheme() {
232 path = &path[scheme.len() + 1..];
234 }
235 if let Some(rem) = path.strip_prefix("//") {
236 let pos = rem.bytes().position(|b| b == b'/').unwrap_or(rem.len());
238 path = &rem[pos..]
239 }
240
241 path.split_once(['?', '#']).map(|p| p.0).unwrap_or(path)
242 }
243
244 pub fn query(&self) -> Option<&str> {
247 let pos = self.uri.bytes().position(|b| b == b'?' || b == b'#')?;
248 if self.uri.as_bytes()[pos] == b'#' {
249 return None;
250 }
251 let query = &self.uri[pos + 1..];
252 let pos = query.bytes().position(|b| b == b'#').unwrap_or(query.len());
253 Some(&query[..pos])
254 }
255
256 pub fn fragment(&self) -> Option<&str> {
259 let pos = self.uri.bytes().position(|b| b == b'#')?;
260 Some(&self.uri[pos + 1..])
261 }
262
263 pub fn components(&self) -> Components<'_> {
265 Components::new(&self.uri)
266 }
267}
268
269impl ToOwned for URIStr {
270 type Owned = URIString;
271
272 fn to_owned(&self) -> Self::Owned {
273 URIString {
274 uri: self.uri.to_owned(),
275 }
276 }
277}
278
279impl From<&URIStr> for URIString {
280 fn from(value: &URIStr) -> Self {
281 value.to_owned()
282 }
283}
284
285impl AsRef<URIStr> for URIStr {
286 fn as_ref(&self) -> &URIStr {
287 self
288 }
289}
290
291impl Clone for Box<URIStr> {
292 fn clone(&self) -> Self {
293 self.as_ref().into()
294 }
295}
296
297macro_rules! impl_boxed_convertion_uri_str {
298 ($( $t:ident ),*) => {
299 $(
300 impl From<&URIStr> for $t<URIStr> {
301 fn from(value: &URIStr) -> Self {
302 let boxed: $t<str> = value.uri.into();
303 unsafe {
304 std::mem::transmute(boxed)
308 }
309 }
310 }
311 )*
312 };
313}
314impl_boxed_convertion_uri_str!(Box, Rc, Arc);
315
316#[derive(Debug, Clone, PartialEq, Eq, Hash)]
317#[repr(transparent)]
318pub struct URIString {
319 uri: String,
330}
331
332impl URIString {
333 pub fn parse(uri: impl AsRef<str>) -> Result<Self, ParseRIError> {
334 fn _parse(uri: &str) -> Result<URIString, ParseRIError> {
335 let uri = escape_except(uri, |b| {
336 b.is_ascii() && (is_reserved(b as u8) || is_unreserved(b as u8))
337 });
338 let mut bytes = uri.as_bytes();
339 parse_uri_reference(&mut bytes)?;
340 if !bytes.is_empty() {
341 Err(ParseRIError::NotTermination)
342 } else {
343 Ok(URIString {
344 uri: uri.into_owned(),
345 })
346 }
347 }
348 _parse(uri.as_ref())
349 }
350
351 pub fn parse_file_path(path: impl AsRef<Path>) -> Result<Self, ParseRIError> {
357 #[cfg(target_family = "unix")]
358 fn _parse_file_path(path: &Path) -> Result<URIString, ParseRIError> {
359 let mut path_str = path.to_str().ok_or(ParseRIError::Unsupported)?.to_owned();
360 if (path.is_dir() || (path.as_os_str().as_encoded_bytes().ends_with(b"\\")))
361 && !path_str.ends_with('/')
362 {
363 path_str.push('/');
364 }
365 if path.is_absolute() {
366 path_str.insert_str(0, "file://");
367 }
368 URIString::parse(path_str)
369 }
370 #[cfg(target_family = "windows")]
371 fn _parse_file_path(path: &Path) -> Result<URIString, ParseRIError> {
372 use std::path::{Component::*, Prefix::*};
373
374 let mut path_str = String::new();
375 let mut verbatim = false;
376 for comp in path.components() {
377 match comp {
378 Prefix(prefix) => match prefix.kind() {
379 Verbatim(root) => {
380 path_str.push_str("file:///");
381 path_str.push_str(
382 &root
383 .to_str()
384 .ok_or(ParseRIError::Unsupported)?
385 .replace('/', "%2F"),
386 );
387 verbatim = true;
388 }
389 VerbatimUNC(server, root) => {
390 path_str.push_str("file://");
391 path_str.push_str(
392 &server
393 .to_str()
394 .ok_or(ParseRIError::Unsupported)?
395 .replace('/', "%2F"),
396 );
397 path_str.push('/');
398 path_str.push_str(
399 &root
400 .to_str()
401 .ok_or(ParseRIError::Unsupported)?
402 .replace('/', "%2F"),
403 );
404 verbatim = true;
405 }
406 VerbatimDisk(letter) => {
407 path_str.push_str("file:");
408 path_str.push(letter as char);
409 path_str.push(':');
410 verbatim = true;
411 }
412 DeviceNS(device) => {
413 path_str.push_str("file:///");
414 path_str.push_str(device.to_str().ok_or(ParseRIError::Unsupported)?);
415 }
416 UNC(server, root) => {
417 path_str.push_str("file://");
418 path_str.push_str(server.to_str().ok_or(ParseRIError::Unsupported)?);
419 path_str.push('/');
420 path_str.push_str(root.to_str().ok_or(ParseRIError::Unsupported)?);
421 }
422 Disk(letter) => {
423 path_str.push_str("file:");
424 path_str.push(letter as char);
425 path_str.push(':');
426 }
427 },
428 RootDir => {}
429 CurDir => path_str.push_str("/."),
430 ParentDir => path_str.push_str("/.."),
431 Normal(segment) => {
432 path_str.push('/');
433 let segment = segment.to_str().ok_or(ParseRIError::Unsupported)?;
434 if verbatim {
435 path_str.push_str(&segment.replace('/', "%2F"));
436 } else {
437 path_str.push_str(segment);
438 }
439 }
440 }
441 }
442 if (path.is_dir()
443 || (path.as_os_str().as_encoded_bytes().ends_with(b"\\")
444 || (!verbatim && path.as_os_str().as_encoded_bytes().ends_with(b"/"))))
445 && !path_str.ends_with('/')
446 {
447 path_str.push('/');
448 }
449 URIString::parse(path_str)
450 }
451 #[cfg(all(not(target_family = "unix"), not(target_family = "windows")))]
452 fn _parse_file_path(path: &Path) -> Result<URIString, ParseRIError> {
453 todo!()
454 }
455 _parse_file_path(path.as_ref())
456 }
457
458 pub fn into_boxed_uri_str(self) -> Box<URIStr> {
459 Box::from(self.as_ref())
460 }
461
462 pub fn normalize(&mut self) {
465 use Component::*;
466
467 let mut uri = String::with_capacity(self.uri.len());
468 let mut paths = vec![];
469 let mut query = None;
470 let mut fragment = None;
471 let mut has_root = false;
472 for comp in self.components() {
473 match comp {
474 Scheme(scheme) => {
475 uri.push_str(&scheme.to_ascii_lowercase());
476 uri.push(':');
477 }
478 Authority {
479 userinfo,
480 host,
481 port,
482 } => {
483 uri.push_str("//");
484 if let Some(userinfo) = userinfo {
485 uri.push_str(userinfo);
486 uri.push('@');
487 }
488 uri.push_str(host);
489 if let Some(port) = port {
490 uri.push(':');
491 uri.push_str(port);
492 }
493 }
494 RootSegment => has_root = true,
495 Segment(segment) => paths.push(segment),
496 Query(q) => query = Some(q),
497 Fragment(f) => fragment = Some(f),
498 }
499 }
500 build_normalized_path(paths.into_iter(), has_root, &mut uri);
501 if let Some(query) = query {
502 uri.push('?');
503 uri.push_str(query);
504 }
505 if let Some(fragment) = fragment {
506 uri.push('#');
507 uri.push_str(fragment);
508 }
509 self.uri = uri;
510 }
511}
512
513impl AsRef<URIStr> for URIString {
514 fn as_ref(&self) -> &URIStr {
515 URIStr::new(&self.uri)
516 }
517}
518
519impl Borrow<URIStr> for URIString {
520 fn borrow(&self) -> &URIStr {
521 self.as_ref()
522 }
523}
524
525impl Deref for URIString {
526 type Target = URIStr;
527
528 fn deref(&self) -> &Self::Target {
529 self.as_ref()
530 }
531}
532
533macro_rules! impl_convertion_uri_string {
534 ($( $t:ty ),*) => {
535 $(
536 impl From<URIString> for $t {
537 fn from(value: URIString) -> $t {
538 From::from(value.as_ref())
539 }
540 }
541 )*
542 };
543}
544impl_convertion_uri_string!(Box<URIStr>, Rc<URIStr>, Arc<URIStr>);
545
546fn build_normalized_path<'a>(
547 segments: impl Iterator<Item = &'a str>,
548 has_root: bool,
549 buffer: &mut String,
550) {
551 let segments = normalize_path_segments(segments, has_root);
552 if has_root {
553 buffer.push('/');
554 }
555 for (i, seg) in segments.into_iter().enumerate() {
556 if i > 0 {
557 buffer.push('/');
558 }
559 buffer.push_str(seg);
560 }
561}
562
563fn normalize_path_segments<'a>(
564 segments: impl Iterator<Item = &'a str>,
565 has_root: bool,
566) -> Vec<&'a str> {
567 let mut stack = vec![];
568 let mut last_dot = false;
569 for seg in segments {
570 if seg == "." {
571 last_dot = true;
573 } else if seg == ".." {
574 if !stack.is_empty() && stack.last() != Some(&"..") {
575 stack.pop();
576 } else if !has_root {
577 stack.push(seg);
578 }
579 last_dot = true;
580 } else {
581 stack.push(seg);
582 last_dot = false;
583 }
584 }
585
586 if last_dot {
587 stack.push("");
588 }
589
590 stack
591}
592
593fn parse_uri_reference(b: &mut &[u8]) -> Result<(), ParseRIError> {
600 if b.is_empty() || matches!(b[0], b'/' | b'?' | b'#') {
601 parse_relative_ref(b)
604 } else {
605 if !b[0].is_ascii_alphabetic() {
609 parse_relative_ref(b)
612 } else {
613 if let Some(&c) = b
617 .iter()
618 .find(|&&b| !b.is_ascii_alphanumeric() && !matches!(b, b'+' | b'-' | b'.'))
619 && c == b':'
620 {
621 parse_uri(b)
622 } else {
623 parse_relative_ref(b)
624 }
625 }
626 }
627}
628
629fn parse_uri(b: &mut &[u8]) -> Result<(), ParseRIError> {
636 parse_scheme(b)?;
637 *b = b
638 .strip_prefix(b":")
639 .ok_or(ParseRIError::InvalidSchemeSeparator)?;
640 parse_hier_part(b)?;
641 if let Some(query) = b.strip_prefix(b"?") {
642 *b = query;
643 parse_query(b)?;
644 }
645 if let Some(fragment) = b.strip_prefix(b"#") {
646 *b = fragment;
647 parse_fragment(b)?;
648 }
649 Ok(())
650}
651
652fn parse_scheme(b: &mut &[u8]) -> Result<(), ParseRIError> {
659 if b.is_empty() || !b[0].is_ascii_alphabetic() {
660 return Err(ParseRIError::InvalidScheme);
661 }
662 let pos = b
663 .iter()
664 .position(|&b| !b.is_ascii_alphanumeric() && !matches!(b, b'+' | b'-' | b'.'))
665 .unwrap_or(b.len());
666 *b = &b[pos..];
667 Ok(())
668}
669
670fn parse_hier_part(b: &mut &[u8]) -> Result<(), ParseRIError> {
680 if let Some(rem) = b.strip_prefix(b"/") {
681 if let Some(rem) = rem.strip_prefix(b"/") {
684 *b = rem;
688 parse_authority(b)?;
689 parse_path_abempty(b)
690 } else {
691 parse_path_absolute(b)
694 }
695 } else {
696 let mut dum = *b;
698 if parse_pchar(&mut dum).is_ok() {
699 parse_path_rootless(b)
701 } else {
702 Ok(())
706 }
707 }
708}
709
710fn parse_authority(b: &mut &[u8]) -> Result<(), ParseRIError> {
717 if b.starts_with(b"[") {
718 parse_ip_literal(b)?;
720 if let Some(rem) = b.strip_prefix(b":") {
721 *b = rem;
722 parse_port(b)?;
723 }
724 return Ok(());
725 }
726
727 let mut colon = usize::MAX;
753 let mut now = 0;
754 let mut t = *b;
755 while !t.is_empty() {
756 let pos = t
757 .iter()
758 .position(|&b| !is_unreserved(b) && !is_sub_delims(b) && b != b'%')
759 .unwrap_or(t.len());
760 t = &t[pos..];
761 now += pos;
762 if let Some(rem) = t.strip_prefix(b":") {
763 now += 1;
764 t = rem;
765 colon = colon.min(now);
766 } else {
767 break;
768 }
769 }
770
771 debug_assert_eq!(now, b.len() - t.len());
772
773 if let Some(rem) = t.strip_prefix(b"@") {
774 *b = rem;
775 parse_host(b)?;
776 if let Some(rem) = b.strip_prefix(b":") {
777 *b = rem;
778 parse_port(b)?;
779 }
780 Ok(())
781 } else if t.starts_with(b"[") {
782 Err(ParseRIError::InvalidAuthority)
783 } else if colon < usize::MAX {
784 *b = &b[colon + 1..];
785 parse_port(b)
786 } else {
787 *b = t;
788 Ok(())
789 }
790}
791
792fn parse_host(b: &mut &[u8]) -> Result<(), ParseRIError> {
810 if b.starts_with(b"[") {
811 parse_ip_literal(b)
812 } else {
813 parse_reg_name(b)
815 }
816}
817
818fn parse_ip_literal(b: &mut &[u8]) -> Result<(), ParseRIError> {
825 *b = b.strip_prefix(b"[").ok_or(ParseRIError::InvalidIPLiteral)?;
826 if !b.is_empty() && b[0].eq_ignore_ascii_case(&b'v') {
827 parse_ipv_future(b)?;
828 } else {
829 parse_ipv6_address(b)?;
830 }
831 *b = b.strip_prefix(b"]").ok_or(ParseRIError::InvalidIPLiteral)?;
832 Ok(())
833}
834
835fn parse_ipv_future(b: &mut &[u8]) -> Result<(), ParseRIError> {
842 if b.is_empty() || !b[0].eq_ignore_ascii_case(&b'v') {
843 return Err(ParseRIError::InvalidIPvFuture);
844 }
845 *b = &b[1..];
846 let pos = b
847 .iter()
848 .position(|&b| !b.is_ascii_hexdigit())
849 .unwrap_or(b.len());
850 if !(1..=b.len() - 2).contains(&pos) {
851 return Err(ParseRIError::InvalidIPvFuture);
852 }
853 *b = &b[pos..];
854 *b = b.strip_prefix(b".").ok_or(ParseRIError::InvalidIPvFuture)?;
855 let pos = b
856 .iter()
857 .position(|&b| !is_unreserved(b) && !is_sub_delims(b) && b != b':')
858 .unwrap_or(b.len());
859 if pos == 0 {
860 return Err(ParseRIError::InvalidIPvFuture);
861 }
862 *b = &b[pos..];
863 Ok(())
864}
865
866fn parse_ipv6_address(b: &mut &[u8]) -> Result<(), ParseRIError> {
885 let mut cnt = 1;
886 let mut omit = false;
887 if let Some(rem) = b.strip_prefix(b":") {
888 *b = rem;
889 omit = true;
890 } else {
891 parse_h16(b)?;
892 }
893
894 while cnt + (omit as i32) < 8
895 && let Some(rem) = b.strip_prefix(b":")
896 {
897 *b = rem;
898 if b.starts_with(b":") {
899 if omit {
900 return Err(ParseRIError::InvalidIPv6address);
901 }
902 omit = true;
903 cnt += 1;
904 continue;
905 }
906
907 let mut dum = *b;
915 if parse_ipv4_address(&mut dum).is_ok() {
916 *b = dum;
917 cnt += 2;
919 break;
921 } else if !b.is_empty() && b[0].is_ascii_hexdigit() {
922 parse_h16(b)?;
923 }
924 }
925
926 if (omit && cnt <= 8) || (!omit && cnt == 8) {
929 Ok(())
930 } else {
931 Err(ParseRIError::InvalidIPv6address)
932 }
933}
934
935fn parse_h16(b: &mut &[u8]) -> Result<(), ParseRIError> {
943 let pos = b
944 .iter()
945 .position(|&b| !b.is_ascii_hexdigit())
946 .unwrap_or(b.len());
947 if pos == 0 {
948 Err(ParseRIError::InvalidH16)
949 } else {
950 *b = &b[pos.min(4)..];
951 Ok(())
952 }
953}
954
955fn parse_ipv4_address(b: &mut &[u8]) -> Result<(), ParseRIError> {
967 parse_dec_octet(b)?;
968 for _ in 0..3 {
969 *b = b.strip_prefix(b".").ok_or(ParseRIError::InvalidDecOctet)?;
970 parse_dec_octet(b)?;
971 }
972 Ok(())
973}
974fn parse_dec_octet(b: &mut &[u8]) -> Result<(), ParseRIError> {
975 let len = match b {
976 [b'2', b'5', b'0'..=b'5', ..] => 3,
977 [b'2', b'0'..=b'4', b'0'..=b'9', ..] => 3,
978 [b'1', b'0'..=b'9', b'0'..=b'9', ..] => 3,
979 [b'1'..=b'9', b'0'..=b'9', ..] => 2,
980 [b'0'..=b'9', ..] => 1,
981 _ => return Err(ParseRIError::InvalidDecOctet),
982 };
983 *b = &b[len..];
984 Ok(())
985}
986
987fn parse_reg_name(b: &mut &[u8]) -> Result<(), ParseRIError> {
994 while !b.is_empty() && !matches!(b[0], b':' | b'@') && parse_pchar(b).is_ok() {}
997 Ok(())
998}
999
1000fn parse_port(b: &mut &[u8]) -> Result<(), ParseRIError> {
1007 let pos = b
1008 .iter()
1009 .position(|&b| !b.is_ascii_digit())
1010 .unwrap_or(b.len());
1011 *b = &b[pos..];
1012 Ok(())
1013}
1014
1015fn parse_path_abempty(b: &mut &[u8]) -> Result<(), ParseRIError> {
1022 while let Some(rem) = b.strip_prefix(b"/") {
1023 *b = rem;
1024 parse_segment(b)?;
1025 }
1026 Ok(())
1027}
1028
1029fn parse_path_absolute(b: &mut &[u8]) -> Result<(), ParseRIError> {
1036 *b = b
1037 .strip_prefix(b"/")
1038 .ok_or(ParseRIError::InvalidPathAbsolute)?;
1039 if parse_segment_nz(b).is_ok() {
1040 while let Some(rem) = b.strip_prefix(b"/") {
1041 *b = rem;
1042 parse_segment(b)?;
1043 }
1044 }
1045 Ok(())
1046}
1047
1048fn parse_path_noscheme(b: &mut &[u8]) -> Result<(), ParseRIError> {
1055 parse_segment_nz_nc(b)?;
1056 while let Some(rem) = b.strip_prefix(b"/") {
1057 *b = rem;
1058 parse_segment(b)?;
1059 }
1060 Ok(())
1061}
1062
1063fn parse_path_rootless(b: &mut &[u8]) -> Result<(), ParseRIError> {
1070 parse_segment_nz(b)?;
1071 while let Some(rem) = b.strip_prefix(b"/") {
1072 *b = rem;
1073 parse_segment(b)?;
1074 }
1075 Ok(())
1076}
1077
1078fn parse_segment(b: &mut &[u8]) -> Result<(), ParseRIError> {
1096 while parse_pchar(b).is_ok() {}
1097 Ok(())
1098}
1099
1100fn parse_segment_nz(b: &mut &[u8]) -> Result<(), ParseRIError> {
1107 parse_pchar(b)?;
1108 while parse_pchar(b).is_ok() {}
1109 Ok(())
1110}
1111
1112fn parse_segment_nz_nc(b: &mut &[u8]) -> Result<(), ParseRIError> {
1120 if b.is_empty() || b[0] == b':' || parse_pchar(b).is_err() {
1121 return Err(ParseRIError::InvalidSegmentNzNc);
1122 }
1123 while !b.is_empty() && b[0] != b':' && parse_pchar(b).is_ok() {}
1124 Ok(())
1125}
1126
1127fn parse_pchar(b: &mut &[u8]) -> Result<(), ParseRIError> {
1134 if b.is_empty() {
1135 return Err(ParseRIError::InvalidPChar);
1136 }
1137
1138 if is_unreserved(b[0]) || is_sub_delims(b[0]) || matches!(b[0], b':' | b'@') {
1139 *b = &b[1..];
1140 Ok(())
1141 } else if b.len() >= 3 && b[0] == b'%' && b[1].is_ascii_hexdigit() && b[2].is_ascii_hexdigit() {
1142 *b = &b[3..];
1143 Ok(())
1144 } else {
1145 Err(ParseRIError::InvalidPChar)
1146 }
1147}
1148
1149fn parse_query(b: &mut &[u8]) -> Result<(), ParseRIError> {
1156 loop {
1157 if let Some(rem) = b.strip_prefix(b"/") {
1158 *b = rem;
1159 } else if let Some(rem) = b.strip_prefix(b"?") {
1160 *b = rem;
1161 } else if parse_pchar(b).is_ok() {
1162 } else {
1164 break Ok(());
1165 }
1166 }
1167}
1168
1169fn parse_fragment(b: &mut &[u8]) -> Result<(), ParseRIError> {
1176 loop {
1177 if let Some(rem) = b.strip_prefix(b"/") {
1178 *b = rem;
1179 } else if let Some(rem) = b.strip_prefix(b"?") {
1180 *b = rem;
1181 } else if parse_pchar(b).is_ok() {
1182 } else {
1184 break Ok(());
1185 }
1186 }
1187}
1188
1189fn parse_relative_ref(b: &mut &[u8]) -> Result<(), ParseRIError> {
1196 parse_relative_part(b)?;
1197 if let Some(query) = b.strip_prefix(b"?") {
1198 *b = query;
1199 parse_query(b)?;
1200 }
1201 if let Some(fragment) = b.strip_prefix(b"#") {
1202 *b = fragment;
1203 parse_fragment(b)?;
1204 }
1205 Ok(())
1206}
1207
1208fn parse_relative_part(b: &mut &[u8]) -> Result<(), ParseRIError> {
1218 if let Some(rem) = b.strip_prefix(b"/") {
1219 if let Some(rem) = rem.strip_prefix(b"/") {
1220 *b = rem;
1221 parse_authority(b)?;
1222 parse_path_abempty(b)
1223 } else {
1224 parse_path_absolute(b)
1225 }
1226 } else {
1227 let orig = b.len();
1228 let ret = parse_path_noscheme(b);
1229 if orig == b.len() { Ok(()) } else { ret }
1231 }
1232}
1233
1234fn is_reserved(b: u8) -> bool {
1241 is_gen_delims(b) || is_sub_delims(b)
1242}
1243
1244fn is_gen_delims(b: u8) -> bool {
1251 matches!(b, b':' | b'/' | b'?' | b'#' | b'[' | b']' | b'@')
1252}
1253
1254fn is_sub_delims(b: u8) -> bool {
1261 matches!(
1262 b,
1263 b'!' | b'$' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+' | b',' | b';' | b'='
1264 )
1265}
1266
1267fn is_unreserved(b: u8) -> bool {
1274 b.is_ascii_alphanumeric() || matches!(b, b'-' | b'.' | b'_' | b'~')
1275}
1276
1277const LUT_BYTES: [u8; 256 * 3] = {
1278 const fn digit_to_hex_char(b: u8) -> u8 {
1279 if b < 10 { b + b'0' } else { b - 10 + b'A' }
1280 }
1281 let mut buf = [0u8; 256 * 3];
1282 let mut i = 0;
1283 while i < 256 {
1284 buf[3 * i] = b'%';
1285 let hi = (i as u8 >> 4) & 0xF;
1286 let lo = i as u8 & 0xF;
1287 buf[3 * i + 1] = digit_to_hex_char(hi);
1288 buf[3 * i + 2] = digit_to_hex_char(lo);
1289 i += 1;
1290 }
1291 buf
1292};
1293const LUT: &str = unsafe {
1294 from_utf8_unchecked(&LUT_BYTES)
1298};
1299
1300pub fn escape(s: &str) -> Cow<'_, str> {
1301 escape_except(s, |_| false)
1302}
1303
1304pub fn escape_bytes(b: &[u8]) -> Cow<'_, [u8]> {
1305 escape_bytes_except(b, |_| false)
1306}
1307
1308pub fn escape_except(s: &str, is_except: impl Fn(char) -> bool) -> Cow<'_, str> {
1309 let cap = s
1310 .chars()
1311 .filter_map(|c| (!is_except(c)).then_some(c.len_utf8() * 2))
1312 .sum::<usize>();
1313 if cap == 0 {
1314 return Cow::Borrowed(s);
1315 }
1316 let mut encode = [0; 6];
1317 let mut buf = String::with_capacity(s.len() + cap);
1318 for c in s.chars() {
1319 if is_except(c) {
1320 buf.push(c);
1321 } else {
1322 let encoded = c.encode_utf8(&mut encode);
1323 for b in encoded.bytes() {
1324 let index = b as usize * 3;
1325 buf.push_str(&LUT[index..index + 3]);
1326 }
1327 }
1328 }
1329 Cow::Owned(buf)
1330}
1331
1332pub fn escape_bytes_except(b: &[u8], is_except: impl Fn(u8) -> bool) -> Cow<'_, [u8]> {
1333 let cap = b.iter().copied().filter(|&b| !is_except(b)).count() * 2;
1334 if cap == 0 {
1335 return Cow::Borrowed(b);
1336 }
1337 let mut buf = Vec::with_capacity(b.len() + cap);
1338 for &b in b {
1339 if is_except(b) {
1340 buf.push(b);
1341 } else {
1342 let index = b as usize * 3;
1343 buf.extend_from_slice(&LUT_BYTES[index..index + 3]);
1344 }
1345 }
1346 Cow::Owned(buf)
1347}
1348
1349pub enum URIUnescapeError {
1350 InvalidEscape,
1351 Utf8Error(std::str::Utf8Error),
1352}
1353
1354impl From<std::str::Utf8Error> for URIUnescapeError {
1355 fn from(value: std::str::Utf8Error) -> Self {
1356 Self::Utf8Error(value)
1357 }
1358}
1359
1360pub fn unescape(s: &str) -> Result<Cow<'_, str>, URIUnescapeError> {
1361 if !s.contains('%') {
1362 return Ok(Cow::Borrowed(s));
1363 }
1364
1365 let mut split = s.split('%');
1366 let mut buf = String::with_capacity(s.len());
1367 buf.push_str(split.next().unwrap());
1368 let mut bytes = vec![];
1369 for chunk in split {
1370 if chunk.len() < 2 {
1371 return Err(URIUnescapeError::InvalidEscape);
1372 }
1373 let byte =
1374 u8::from_str_radix(&chunk[..2], 16).map_err(|_| URIUnescapeError::InvalidEscape)?;
1375 bytes.push(byte);
1376
1377 if chunk.len() > 2 {
1378 buf.push_str(from_utf8(&bytes)?);
1379 buf.push_str(&chunk[2..]);
1380 bytes.clear();
1381 }
1382 }
1383
1384 if !bytes.is_empty() {
1385 buf.push_str(from_utf8(&bytes)?);
1386 }
1387 Ok(Cow::Owned(buf))
1388}
1389
1390pub fn unescape_bytes(b: &[u8]) -> Result<Cow<'_, [u8]>, URIUnescapeError> {
1391 if !b.contains(&b'%') {
1392 return Ok(Cow::Borrowed(b));
1393 }
1394
1395 let mut split = b.split(|&b| b == b'%');
1396 let mut buf = Vec::with_capacity(b.len());
1397 buf.extend_from_slice(split.next().unwrap());
1398
1399 fn hexdigit_to_byte(hex: u8) -> u8 {
1400 if hex.is_ascii_digit() {
1401 hex - b'0'
1402 } else if hex.is_ascii_uppercase() {
1403 hex - b'A' + 10
1404 } else {
1405 hex - b'a' + 10
1406 }
1407 }
1408 for chunk in split {
1409 if chunk.len() < 2 || !chunk[0].is_ascii_hexdigit() || !chunk[1].is_ascii_hexdigit() {
1410 return Err(URIUnescapeError::InvalidEscape);
1411 }
1412 let hi = hexdigit_to_byte(chunk[0]);
1413 let lo = hexdigit_to_byte(chunk[1]);
1414 buf.push((hi << 4) | lo);
1415 }
1416 Ok(Cow::Owned(buf))
1417}
1418
1419#[derive(Debug, Clone, Copy)]
1420enum DecomposeState {
1421 Scheme,
1422 Authority,
1423 Root,
1424 Path,
1425 Query,
1426 Fragment,
1427 Finish,
1428}
1429
1430pub struct Components<'a> {
1431 state: DecomposeState,
1432 uri: &'a str,
1433}
1434
1435impl Components<'_> {
1436 fn new(uri: &str) -> Components<'_> {
1437 Components {
1438 state: DecomposeState::Scheme,
1439 uri,
1440 }
1441 }
1442}
1443
1444impl<'a> Iterator for Components<'a> {
1445 type Item = Component<'a>;
1446
1447 fn next(&mut self) -> Option<Self::Item> {
1448 use DecomposeState::*;
1449 loop {
1450 match self.state {
1451 Scheme => {
1452 self.state = Authority;
1453 let mut bytes = self.uri.as_bytes();
1454 if parse_scheme(&mut bytes).is_ok() && bytes.starts_with(b":") {
1455 let len = self.uri.len() - bytes.len();
1456 let (scheme, rem) = self.uri.split_at(len);
1457 self.uri = &rem[1..];
1458 break Some(Component::Scheme(scheme));
1459 }
1460 }
1461 Authority => {
1462 self.state = Root;
1463 if let Some(rem) = self.uri.strip_prefix("//") {
1464 let pos = rem.bytes().position(|b| b == b'/').unwrap_or(rem.len());
1465 let (mut authority, rem) = rem.split_at(pos);
1466 self.uri = rem;
1467 let mut userinfo = None;
1468 if let Some((ui, rem)) = authority.split_once('@') {
1469 userinfo = Some(ui);
1470 authority = rem;
1471 }
1472 let mut port = None;
1473 if let Some((host, p)) = authority.rsplit_once(':')
1474 && p.bytes().all(|b| b.is_ascii_digit())
1475 {
1476 port = Some(p);
1477 authority = host;
1478 }
1479 break Some(Component::Authority {
1480 userinfo,
1481 host: authority,
1482 port,
1483 });
1484 }
1485 }
1486 Root => {
1487 self.state = Path;
1488 if let Some(rem) = self.uri.strip_prefix('/') {
1489 self.uri = rem;
1490 break Some(Component::RootSegment);
1491 }
1492 }
1493 Path => {
1494 let pos = self
1495 .uri
1496 .bytes()
1497 .position(|b| b == b'/' || b == b'?' || b == b'#')
1498 .unwrap_or(self.uri.len());
1499 let (segment, rem) = self.uri.split_at(pos);
1500 if let Some(rem) = rem.strip_prefix('/') {
1501 self.uri = rem;
1502 } else {
1503 self.uri = rem;
1504 self.state = Query;
1505 }
1506 break Some(Component::Segment(segment));
1507 }
1508 Query => {
1509 self.state = Fragment;
1510 if let Some(rem) = self.uri.strip_prefix('?') {
1511 let pos = rem.bytes().position(|b| b == b'#').unwrap_or(rem.len());
1512 let (query, rem) = rem.split_at(pos);
1513 self.uri = rem;
1514 break Some(Component::Query(query));
1515 }
1516 }
1517 Fragment => {
1518 debug_assert!(self.uri.is_empty() || self.uri.starts_with('#'));
1519 self.state = Finish;
1520 if !self.uri.is_empty() {
1521 let (_, frag) = self.uri.split_at(1);
1522 self.uri = "";
1523 break Some(Component::Fragment(frag));
1524 }
1525 }
1526 Finish => break None,
1527 }
1528 }
1529 }
1530}
1531
1532pub enum Component<'a> {
1533 Scheme(&'a str),
1534 Authority {
1535 userinfo: Option<&'a str>,
1536 host: &'a str,
1537 port: Option<&'a str>,
1538 },
1539 RootSegment,
1540 Segment(&'a str),
1541 Query(&'a str),
1542 Fragment(&'a str),
1543}