1use std::{
2 borrow::{Borrow, Cow},
3 ops::Deref,
4 path::Path,
5 rc::Rc,
6 str::{from_utf8, from_utf8_unchecked},
7 sync::Arc,
8};
9
10use crate::ParseRIError;
11
12#[derive(Debug, PartialEq, Eq, Hash)]
14#[repr(transparent)]
15pub struct URIStr {
16 uri: str,
17}
18
19impl URIStr {
20 fn new(s: &str) -> &Self {
21 unsafe {
22 &*(s as *const str as *const Self)
26 }
27 }
28
29 pub fn resolve(&self, reference: &Self) -> URIString {
39 use Component::*;
40
41 let base = if self.is_absolute() {
42 Cow::Borrowed(self)
43 } else {
44 let mut base = self.to_owned();
45 base.normalize();
46 if let Some(frag) = base.uri.bytes().position(|b| b == b'#') {
47 base.uri.truncate(frag);
48 }
49 assert!(
50 base.is_absolute(),
51 "'{}' is not absolute",
52 base.as_escaped_str()
53 );
54 Cow::Owned(base)
55 };
56
57 let mut ref_components = reference.components().peekable();
58 if ref_components
59 .next_if(|comp| matches!(comp, Scheme(_)))
60 .is_some()
61 {
62 let mut ret = reference.to_owned();
63 ret.normalize();
64 return ret;
65 }
66
67 if ref_components
68 .next_if(|comp| matches!(comp, Authority { .. }))
69 .is_some()
70 {
71 let mut ret = URIString {
73 uri: [base.scheme().unwrap(), ":", &reference.uri].concat(),
74 };
75 ret.normalize();
76 return ret;
77 }
78
79 let mut components = base.components().peekable();
80 let mut uri = String::new();
81 if let Some(Scheme(scheme)) = components.next_if(|comp| matches!(comp, Scheme(_))) {
82 uri.push_str(scheme);
83 uri.push(':');
84 }
85 if let Some(Authority {
86 userinfo,
87 host,
88 port,
89 }) = components.next_if(|comp| matches!(comp, Authority { .. }))
90 {
91 uri.push_str("//");
92 if let Some(userinfo) = userinfo {
93 uri.push_str(userinfo);
94 uri.push(':');
95 }
96 uri.push_str(host);
97 if let Some(port) = port {
98 uri.push(':');
99 uri.push_str(port);
100 }
101 }
102
103 if ref_components
104 .next_if(|comp| matches!(comp, RootSegment))
105 .is_some()
106 {
107 uri.push_str(&reference.uri);
108 let mut ret = URIString { uri };
109 ret.normalize();
110 return ret;
111 }
112
113 let mut segments = vec![];
114 let has_root = components
115 .next_if(|comp| matches!(comp, RootSegment))
116 .is_some();
117 let mut has_dot_segment = false;
118 while let Some(Segment(segment)) = components.next_if(|comp| matches!(comp, Segment(_))) {
119 segments.push(segment);
120 has_dot_segment |= segment == "." || segment == "..";
121 }
122 if has_dot_segment {
123 segments = normalize_path_segments(segments.into_iter(), has_root);
124 }
125
126 let mut has_path = false;
127 if let Some(Segment(segment)) = ref_components.next_if(|comp| matches!(comp, Segment(_))) {
128 let mut buf = vec![segment];
129 while let Some(Segment(segment)) =
130 ref_components.next_if(|comp| matches!(comp, Segment(_)))
131 {
132 buf.push(segment);
133 }
134 if buf.len() > 1 || !buf[0].is_empty() {
135 segments.pop();
136 segments.extend(buf);
137 has_path = true;
138 }
139 }
140 build_normalized_path(segments.into_iter(), has_root, &mut uri);
141
142 if let Some(Query(query)) = ref_components.next_if(|comp| matches!(comp, Query(_))) {
143 uri.push('?');
144 uri.push_str(query);
145 } else if !has_path
146 && let Some(Query(query)) = components.next_if(|comp| matches!(comp, Query(_)))
147 {
148 uri.push('?');
149 uri.push_str(query);
150 }
151
152 if let Some(Fragment(fragment)) = ref_components.next() {
153 uri.push('#');
154 uri.push_str(fragment);
155 }
156
157 URIString { uri }
158 }
159
160 pub fn as_escaped_str(&self) -> &str {
162 &self.uri
163 }
164
165 pub fn as_unescaped_str(&self) -> Option<Cow<'_, str>> {
169 unescape(&self.uri).ok()
170 }
171
172 pub fn is_absolute(&self) -> bool {
177 self.scheme().is_some() && self.fragment().is_none()
178 }
179
180 pub fn is_relative(&self) -> bool {
185 self.scheme().is_none()
186 }
187
188 pub fn scheme(&self) -> Option<&str> {
193 let pos = self.uri.bytes().position(is_reserved)?;
194 (self.uri.as_bytes()[pos] == b':').then_some(&self.uri[..pos])
195 }
196
197 pub fn authority(&self) -> Option<&str> {
202 let rem = self
203 .uri
204 .strip_prefix("//")
205 .or_else(|| self.uri.split_once("://").map(|p| p.1))?;
206 Some(rem.split_once('/').map(|p| p.0).unwrap_or(rem))
207 }
208
209 pub fn userinfo(&self) -> Option<&str> {
214 Some(self.authority()?.split_once('@')?.0)
215 }
216
217 pub fn host(&self) -> Option<&str> {
222 let mut auth = self.authority()?;
223 if let Some((_userinfo, rem)) = auth.split_once('@') {
224 auth = rem;
225 }
226 if let Some((host, port)) = auth.rsplit_once(':')
227 && port.bytes().all(|b| b.is_ascii_digit())
228 {
229 auth = host;
230 }
231 Some(auth)
232 }
233
234 pub fn port(&self) -> Option<&str> {
239 let (_, port) = self.authority()?.rsplit_once(':')?;
240 port.bytes().all(|b| b.is_ascii_digit()).then_some(port)
241 }
242
243 pub fn path(&self) -> &str {
248 let mut path = &self.uri;
249 if let Some(scheme) = self.scheme() {
250 path = &path[scheme.len() + 1..];
252 }
253 if let Some(rem) = path.strip_prefix("//") {
254 let pos = rem.bytes().position(|b| b == b'/').unwrap_or(rem.len());
256 path = &rem[pos..]
257 }
258
259 path.split_once(['?', '#']).map(|p| p.0).unwrap_or(path)
260 }
261
262 pub fn query(&self) -> Option<&str> {
267 let pos = self.uri.bytes().position(|b| b == b'?' || b == b'#')?;
268 if self.uri.as_bytes()[pos] == b'#' {
269 return None;
270 }
271 let query = &self.uri[pos + 1..];
272 let pos = query.bytes().position(|b| b == b'#').unwrap_or(query.len());
273 Some(&query[..pos])
274 }
275
276 pub fn fragment(&self) -> Option<&str> {
281 let pos = self.uri.bytes().position(|b| b == b'#')?;
282 Some(&self.uri[pos + 1..])
283 }
284
285 pub fn components(&self) -> Components<'_> {
287 Components::new(&self.uri)
288 }
289}
290
291impl ToOwned for URIStr {
292 type Owned = URIString;
293
294 fn to_owned(&self) -> Self::Owned {
295 URIString {
296 uri: self.uri.to_owned(),
297 }
298 }
299}
300
301impl From<&URIStr> for URIString {
302 fn from(value: &URIStr) -> Self {
303 value.to_owned()
304 }
305}
306
307impl AsRef<URIStr> for URIStr {
308 fn as_ref(&self) -> &URIStr {
309 self
310 }
311}
312
313impl Clone for Box<URIStr> {
314 fn clone(&self) -> Self {
315 self.as_ref().into()
316 }
317}
318
319impl std::fmt::Display for URIStr {
320 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
321 write!(
322 f,
323 "{}",
324 self.as_unescaped_str()
325 .as_deref()
326 .unwrap_or(self.as_escaped_str())
327 )
328 }
329}
330
331macro_rules! impl_boxed_convertion_uri_str {
332 ($( $t:ident ),*) => {
333 $(
334 impl From<&URIStr> for $t<URIStr> {
335 fn from(value: &URIStr) -> Self {
336 let boxed: $t<str> = value.uri.into();
337 unsafe {
338 std::mem::transmute(boxed)
342 }
343 }
344 }
345 )*
346 };
347}
348impl_boxed_convertion_uri_str!(Box, Rc, Arc);
349
350#[derive(Debug, Clone, PartialEq, Eq, Hash)]
352#[repr(transparent)]
353pub struct URIString {
354 uri: String,
365}
366
367impl URIString {
368 pub fn parse(uri: impl AsRef<str>) -> Result<Self, ParseRIError> {
381 fn _parse(uri: &str) -> Result<URIString, ParseRIError> {
382 let uri = escape_except(uri, |b| {
383 b.is_ascii() && (is_reserved(b as u8) || is_unreserved(b as u8))
384 });
385 URIString::parse_escaped(&uri)
386 }
387 _parse(uri.as_ref())
388 }
389
390 pub fn parse_system_id(uri: impl AsRef<str>) -> Result<Self, ParseRIError> {
416 fn _parse(uri: &str) -> Result<URIString, ParseRIError> {
417 let uri = escape_except(uri, |b| {
418 b.is_ascii()
420 && !matches!(
421 b as u8,
422 0..=0x1F
423 | 0x20
424 | 0x22
425 | 0x3C
426 | 0x3E
427 | 0x5C
428 | 0x5E
429 | 0x60
430 | 0x7B..=0x7D
431 | 0x7F..
432 )
433 });
434 URIString::parse_escaped(&uri)
435 }
436 _parse(uri.as_ref())
437 }
438
439 fn parse_escaped(uri: impl AsRef<str>) -> Result<Self, ParseRIError> {
447 fn _parse(uri: &str) -> Result<URIString, ParseRIError> {
448 let mut bytes = uri.as_bytes();
449 parse_uri_reference(&mut bytes)?;
450 if !bytes.is_empty() {
451 Err(ParseRIError::NotTermination)
452 } else {
453 Ok(URIString {
454 uri: uri.to_owned(),
455 })
456 }
457 }
458 _parse(uri.as_ref())
459 }
460
461 pub fn parse_file_path(path: impl AsRef<Path>) -> Result<Self, ParseRIError> {
467 #[cfg(target_family = "unix")]
468 fn _parse_file_path(path: &Path) -> Result<URIString, ParseRIError> {
469 let mut path_str = path.to_str().ok_or(ParseRIError::Unsupported)?.to_owned();
470 if (path.is_dir() || (path.as_os_str().as_encoded_bytes().ends_with(b"\\")))
471 && !path_str.ends_with('/')
472 {
473 path_str.push('/');
474 }
475 if path.is_absolute() {
476 path_str.insert_str(0, "file://");
477 }
478 URIString::parse(path_str)
479 }
480 #[cfg(target_family = "windows")]
481 fn _parse_file_path(path: &Path) -> Result<URIString, ParseRIError> {
482 use std::path::{Component::*, Prefix::*};
483
484 let mut path_str = String::new();
485 let mut verbatim = false;
486 for comp in path.components() {
487 match comp {
488 Prefix(prefix) => match prefix.kind() {
489 Verbatim(root) => {
490 path_str.push_str("file:///");
491 path_str.push_str(
492 &root
493 .to_str()
494 .ok_or(ParseRIError::Unsupported)?
495 .replace('/', "%2F"),
496 );
497 verbatim = true;
498 }
499 VerbatimUNC(server, root) => {
500 path_str.push_str("file://");
501 path_str.push_str(
502 &server
503 .to_str()
504 .ok_or(ParseRIError::Unsupported)?
505 .replace('/', "%2F"),
506 );
507 path_str.push('/');
508 path_str.push_str(
509 &root
510 .to_str()
511 .ok_or(ParseRIError::Unsupported)?
512 .replace('/', "%2F"),
513 );
514 verbatim = true;
515 }
516 VerbatimDisk(letter) => {
517 path_str.push_str("file:");
518 path_str.push(letter as char);
519 path_str.push(':');
520 verbatim = true;
521 }
522 DeviceNS(device) => {
523 path_str.push_str("file:///");
524 path_str.push_str(device.to_str().ok_or(ParseRIError::Unsupported)?);
525 }
526 UNC(server, root) => {
527 path_str.push_str("file://");
528 path_str.push_str(server.to_str().ok_or(ParseRIError::Unsupported)?);
529 path_str.push('/');
530 path_str.push_str(root.to_str().ok_or(ParseRIError::Unsupported)?);
531 }
532 Disk(letter) => {
533 path_str.push_str("file:");
534 path_str.push(letter as char);
535 path_str.push(':');
536 }
537 },
538 RootDir => {}
539 CurDir => {
540 if !path_str.is_empty() {
541 path_str.push_str("/.");
542 } else {
543 path_str.push_str(".");
544 }
545 }
546 ParentDir => {
547 if !path_str.is_empty() {
548 path_str.push_str("/..");
549 } else {
550 path_str.push_str("..")
551 }
552 }
553 Normal(segment) => {
554 if !path_str.is_empty() {
555 path_str.push('/');
556 }
557 let segment = segment.to_str().ok_or(ParseRIError::Unsupported)?;
558 if verbatim {
559 path_str.push_str(&segment.replace('/', "%2F"));
560 } else {
561 path_str.push_str(segment);
562 }
563 }
564 }
565 }
566 if (path.is_dir()
567 || (path.as_os_str().as_encoded_bytes().ends_with(b"\\")
568 || (!verbatim && path.as_os_str().as_encoded_bytes().ends_with(b"/"))))
569 && !path_str.ends_with('/')
570 {
571 path_str.push('/');
572 }
573 URIString::parse(path_str)
574 }
575 #[cfg(all(not(target_family = "unix"), not(target_family = "windows")))]
576 fn _parse_file_path(path: &Path) -> Result<URIString, ParseRIError> {
577 todo!()
578 }
579 _parse_file_path(path.as_ref())
580 }
581
582 pub fn into_boxed_uri_str(self) -> Box<URIStr> {
584 Box::from(self.as_ref())
585 }
586
587 pub fn normalize(&mut self) {
592 use Component::*;
593
594 let mut uri = String::with_capacity(self.uri.len());
595 let mut paths = vec![];
596 let mut query = None;
597 let mut fragment = None;
598 let mut has_root = false;
599 for comp in self.components() {
600 match comp {
601 Scheme(scheme) => {
602 uri.push_str(&scheme.to_ascii_lowercase());
603 uri.push(':');
604 }
605 Authority {
606 userinfo,
607 host,
608 port,
609 } => {
610 uri.push_str("//");
611 if let Some(userinfo) = userinfo {
612 uri.push_str(userinfo);
613 uri.push('@');
614 }
615 uri.push_str(host);
616 if let Some(port) = port {
617 uri.push(':');
618 uri.push_str(port);
619 }
620 }
621 RootSegment => has_root = true,
622 Segment(segment) => paths.push(segment),
623 Query(q) => query = Some(q),
624 Fragment(f) => fragment = Some(f),
625 }
626 }
627 build_normalized_path(paths.into_iter(), has_root, &mut uri);
628 if let Some(query) = query {
629 uri.push('?');
630 uri.push_str(query);
631 }
632 if let Some(fragment) = fragment {
633 uri.push('#');
634 uri.push_str(fragment);
635 }
636 self.uri = uri;
637 }
638}
639
640impl AsRef<URIStr> for URIString {
641 fn as_ref(&self) -> &URIStr {
642 URIStr::new(&self.uri)
643 }
644}
645
646impl Borrow<URIStr> for URIString {
647 fn borrow(&self) -> &URIStr {
648 self.as_ref()
649 }
650}
651
652impl Deref for URIString {
653 type Target = URIStr;
654
655 fn deref(&self) -> &Self::Target {
656 self.as_ref()
657 }
658}
659
660impl std::fmt::Display for URIString {
661 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
662 write!(f, "{}", self.as_ref())
663 }
664}
665
666macro_rules! impl_convertion_uri_string {
667 ($( $t:ty ),*) => {
668 $(
669 impl From<URIString> for $t {
670 fn from(value: URIString) -> $t {
671 From::from(value.as_ref())
672 }
673 }
674 )*
675 };
676}
677impl_convertion_uri_string!(Box<URIStr>, Rc<URIStr>, Arc<URIStr>);
678
679fn build_normalized_path<'a>(
680 segments: impl Iterator<Item = &'a str>,
681 has_root: bool,
682 buffer: &mut String,
683) {
684 let segments = normalize_path_segments(segments, has_root);
685 if has_root {
686 buffer.push('/');
687 }
688 for (i, seg) in segments.into_iter().enumerate() {
689 if i > 0 {
690 buffer.push('/');
691 }
692 buffer.push_str(seg);
693 }
694}
695
696fn normalize_path_segments<'a>(
697 segments: impl Iterator<Item = &'a str>,
698 has_root: bool,
699) -> Vec<&'a str> {
700 let mut stack = vec![];
701 let mut last_dot = false;
702 for seg in segments {
703 if seg == "." {
704 last_dot = true;
706 } else if seg == ".." {
707 if !stack.is_empty() && stack.last() != Some(&"..") {
708 stack.pop();
709 } else if !has_root {
710 stack.push(seg);
711 }
712 last_dot = true;
713 } else {
714 stack.push(seg);
715 last_dot = false;
716 }
717 }
718
719 if last_dot {
720 stack.push("");
721 }
722
723 stack
724}
725
726fn parse_uri_reference(b: &mut &[u8]) -> Result<(), ParseRIError> {
733 if b.is_empty() || matches!(b[0], b'/' | b'?' | b'#') {
734 parse_relative_ref(b)
737 } else {
738 if !b[0].is_ascii_alphabetic() {
742 parse_relative_ref(b)
745 } else {
746 if let Some(&c) = b
750 .iter()
751 .find(|&&b| !b.is_ascii_alphanumeric() && !matches!(b, b'+' | b'-' | b'.'))
752 && c == b':'
753 {
754 parse_uri(b)
755 } else {
756 parse_relative_ref(b)
757 }
758 }
759 }
760}
761
762fn parse_uri(b: &mut &[u8]) -> Result<(), ParseRIError> {
769 parse_scheme(b)?;
770 *b = b
771 .strip_prefix(b":")
772 .ok_or(ParseRIError::InvalidSchemeSeparator)?;
773 parse_hier_part(b)?;
774 if let Some(query) = b.strip_prefix(b"?") {
775 *b = query;
776 parse_query(b)?;
777 }
778 if let Some(fragment) = b.strip_prefix(b"#") {
779 *b = fragment;
780 parse_fragment(b)?;
781 }
782 Ok(())
783}
784
785fn parse_scheme(b: &mut &[u8]) -> Result<(), ParseRIError> {
792 if b.is_empty() || !b[0].is_ascii_alphabetic() {
793 return Err(ParseRIError::InvalidScheme);
794 }
795 let pos = b
796 .iter()
797 .position(|&b| !b.is_ascii_alphanumeric() && !matches!(b, b'+' | b'-' | b'.'))
798 .unwrap_or(b.len());
799 *b = &b[pos..];
800 Ok(())
801}
802
803fn parse_hier_part(b: &mut &[u8]) -> Result<(), ParseRIError> {
813 if let Some(rem) = b.strip_prefix(b"/") {
814 if let Some(rem) = rem.strip_prefix(b"/") {
817 *b = rem;
821 parse_authority(b)?;
822 parse_path_abempty(b)
823 } else {
824 parse_path_absolute(b)
827 }
828 } else {
829 let mut dum = *b;
831 if parse_pchar(&mut dum).is_ok() {
832 parse_path_rootless(b)
834 } else {
835 Ok(())
839 }
840 }
841}
842
843fn parse_authority(b: &mut &[u8]) -> Result<(), ParseRIError> {
850 if b.starts_with(b"[") {
851 parse_ip_literal(b)?;
853 if let Some(rem) = b.strip_prefix(b":") {
854 *b = rem;
855 parse_port(b)?;
856 }
857 return Ok(());
858 }
859
860 let mut colon = usize::MAX;
886 let mut now = 0;
887 let mut t = *b;
888 while !t.is_empty() {
889 let pos = t
890 .iter()
891 .position(|&b| !is_unreserved(b) && !is_sub_delims(b) && b != b'%')
892 .unwrap_or(t.len());
893 t = &t[pos..];
894 now += pos;
895 if let Some(rem) = t.strip_prefix(b":") {
896 now += 1;
897 t = rem;
898 colon = colon.min(now);
899 } else {
900 break;
901 }
902 }
903
904 debug_assert_eq!(now, b.len() - t.len());
905
906 if let Some(rem) = t.strip_prefix(b"@") {
907 *b = rem;
908 parse_host(b)?;
909 if let Some(rem) = b.strip_prefix(b":") {
910 *b = rem;
911 parse_port(b)?;
912 }
913 Ok(())
914 } else if t.starts_with(b"[") {
915 Err(ParseRIError::InvalidAuthority)
916 } else if colon < usize::MAX {
917 *b = &b[colon + 1..];
918 parse_port(b)
919 } else {
920 *b = t;
921 Ok(())
922 }
923}
924
925fn parse_host(b: &mut &[u8]) -> Result<(), ParseRIError> {
943 if b.starts_with(b"[") {
944 parse_ip_literal(b)
945 } else {
946 parse_reg_name(b)
948 }
949}
950
951fn parse_ip_literal(b: &mut &[u8]) -> Result<(), ParseRIError> {
958 *b = b.strip_prefix(b"[").ok_or(ParseRIError::InvalidIPLiteral)?;
959 if !b.is_empty() && b[0].eq_ignore_ascii_case(&b'v') {
960 parse_ipv_future(b)?;
961 } else {
962 parse_ipv6_address(b)?;
963 }
964 *b = b.strip_prefix(b"]").ok_or(ParseRIError::InvalidIPLiteral)?;
965 Ok(())
966}
967
968fn parse_ipv_future(b: &mut &[u8]) -> Result<(), ParseRIError> {
975 if b.is_empty() || !b[0].eq_ignore_ascii_case(&b'v') {
976 return Err(ParseRIError::InvalidIPvFuture);
977 }
978 *b = &b[1..];
979 let pos = b
980 .iter()
981 .position(|&b| !b.is_ascii_hexdigit())
982 .unwrap_or(b.len());
983 if !(1..=b.len() - 2).contains(&pos) {
984 return Err(ParseRIError::InvalidIPvFuture);
985 }
986 *b = &b[pos..];
987 *b = b.strip_prefix(b".").ok_or(ParseRIError::InvalidIPvFuture)?;
988 let pos = b
989 .iter()
990 .position(|&b| !is_unreserved(b) && !is_sub_delims(b) && b != b':')
991 .unwrap_or(b.len());
992 if pos == 0 {
993 return Err(ParseRIError::InvalidIPvFuture);
994 }
995 *b = &b[pos..];
996 Ok(())
997}
998
999fn parse_ipv6_address(b: &mut &[u8]) -> Result<(), ParseRIError> {
1018 let mut cnt = 1;
1019 let mut omit = false;
1020 if let Some(rem) = b.strip_prefix(b":") {
1021 *b = rem;
1022 omit = true;
1023 } else {
1024 parse_h16(b)?;
1025 }
1026
1027 while cnt + (omit as i32) < 8
1028 && let Some(rem) = b.strip_prefix(b":")
1029 {
1030 *b = rem;
1031 if b.starts_with(b":") {
1032 if omit {
1033 return Err(ParseRIError::InvalidIPv6address);
1034 }
1035 omit = true;
1036 cnt += 1;
1037 continue;
1038 }
1039
1040 let mut dum = *b;
1048 if parse_ipv4_address(&mut dum).is_ok() {
1049 *b = dum;
1050 cnt += 2;
1052 break;
1054 } else if !b.is_empty() && b[0].is_ascii_hexdigit() {
1055 parse_h16(b)?;
1056 }
1057 }
1058
1059 if (omit && cnt <= 8) || (!omit && cnt == 8) {
1062 Ok(())
1063 } else {
1064 Err(ParseRIError::InvalidIPv6address)
1065 }
1066}
1067
1068fn parse_h16(b: &mut &[u8]) -> Result<(), ParseRIError> {
1076 let pos = b
1077 .iter()
1078 .position(|&b| !b.is_ascii_hexdigit())
1079 .unwrap_or(b.len());
1080 if pos == 0 {
1081 Err(ParseRIError::InvalidH16)
1082 } else {
1083 *b = &b[pos.min(4)..];
1084 Ok(())
1085 }
1086}
1087
1088fn parse_ipv4_address(b: &mut &[u8]) -> Result<(), ParseRIError> {
1100 parse_dec_octet(b)?;
1101 for _ in 0..3 {
1102 *b = b.strip_prefix(b".").ok_or(ParseRIError::InvalidDecOctet)?;
1103 parse_dec_octet(b)?;
1104 }
1105 Ok(())
1106}
1107fn parse_dec_octet(b: &mut &[u8]) -> Result<(), ParseRIError> {
1108 let len = match b {
1109 [b'2', b'5', b'0'..=b'5', ..] => 3,
1110 [b'2', b'0'..=b'4', b'0'..=b'9', ..] => 3,
1111 [b'1', b'0'..=b'9', b'0'..=b'9', ..] => 3,
1112 [b'1'..=b'9', b'0'..=b'9', ..] => 2,
1113 [b'0'..=b'9', ..] => 1,
1114 _ => return Err(ParseRIError::InvalidDecOctet),
1115 };
1116 *b = &b[len..];
1117 Ok(())
1118}
1119
1120fn parse_reg_name(b: &mut &[u8]) -> Result<(), ParseRIError> {
1127 while !b.is_empty() && !matches!(b[0], b':' | b'@') && parse_pchar(b).is_ok() {}
1130 Ok(())
1131}
1132
1133fn parse_port(b: &mut &[u8]) -> Result<(), ParseRIError> {
1140 let pos = b
1141 .iter()
1142 .position(|&b| !b.is_ascii_digit())
1143 .unwrap_or(b.len());
1144 *b = &b[pos..];
1145 Ok(())
1146}
1147
1148fn parse_path_abempty(b: &mut &[u8]) -> Result<(), ParseRIError> {
1155 while let Some(rem) = b.strip_prefix(b"/") {
1156 *b = rem;
1157 parse_segment(b)?;
1158 }
1159 Ok(())
1160}
1161
1162fn parse_path_absolute(b: &mut &[u8]) -> Result<(), ParseRIError> {
1169 *b = b
1170 .strip_prefix(b"/")
1171 .ok_or(ParseRIError::InvalidPathAbsolute)?;
1172 if parse_segment_nz(b).is_ok() {
1173 while let Some(rem) = b.strip_prefix(b"/") {
1174 *b = rem;
1175 parse_segment(b)?;
1176 }
1177 }
1178 Ok(())
1179}
1180
1181fn parse_path_noscheme(b: &mut &[u8]) -> Result<(), ParseRIError> {
1188 parse_segment_nz_nc(b)?;
1189 while let Some(rem) = b.strip_prefix(b"/") {
1190 *b = rem;
1191 parse_segment(b)?;
1192 }
1193 Ok(())
1194}
1195
1196fn parse_path_rootless(b: &mut &[u8]) -> Result<(), ParseRIError> {
1203 parse_segment_nz(b)?;
1204 while let Some(rem) = b.strip_prefix(b"/") {
1205 *b = rem;
1206 parse_segment(b)?;
1207 }
1208 Ok(())
1209}
1210
1211fn parse_segment(b: &mut &[u8]) -> Result<(), ParseRIError> {
1229 while parse_pchar(b).is_ok() {}
1230 Ok(())
1231}
1232
1233fn parse_segment_nz(b: &mut &[u8]) -> Result<(), ParseRIError> {
1240 parse_pchar(b)?;
1241 while parse_pchar(b).is_ok() {}
1242 Ok(())
1243}
1244
1245fn parse_segment_nz_nc(b: &mut &[u8]) -> Result<(), ParseRIError> {
1253 if b.is_empty() || b[0] == b':' || parse_pchar(b).is_err() {
1254 return Err(ParseRIError::InvalidSegmentNzNc);
1255 }
1256 while !b.is_empty() && b[0] != b':' && parse_pchar(b).is_ok() {}
1257 Ok(())
1258}
1259
1260fn parse_pchar(b: &mut &[u8]) -> Result<(), ParseRIError> {
1267 if b.is_empty() {
1268 return Err(ParseRIError::InvalidPChar);
1269 }
1270
1271 if is_unreserved(b[0]) || is_sub_delims(b[0]) || matches!(b[0], b':' | b'@') {
1272 *b = &b[1..];
1273 Ok(())
1274 } else if b.len() >= 3 && b[0] == b'%' && b[1].is_ascii_hexdigit() && b[2].is_ascii_hexdigit() {
1275 *b = &b[3..];
1276 Ok(())
1277 } else {
1278 Err(ParseRIError::InvalidPChar)
1279 }
1280}
1281
1282fn parse_query(b: &mut &[u8]) -> Result<(), ParseRIError> {
1289 loop {
1290 if let Some(rem) = b.strip_prefix(b"/") {
1291 *b = rem;
1292 } else if let Some(rem) = b.strip_prefix(b"?") {
1293 *b = rem;
1294 } else if parse_pchar(b).is_ok() {
1295 } else {
1297 break Ok(());
1298 }
1299 }
1300}
1301
1302fn parse_fragment(b: &mut &[u8]) -> Result<(), ParseRIError> {
1309 loop {
1310 if let Some(rem) = b.strip_prefix(b"/") {
1311 *b = rem;
1312 } else if let Some(rem) = b.strip_prefix(b"?") {
1313 *b = rem;
1314 } else if parse_pchar(b).is_ok() {
1315 } else {
1317 break Ok(());
1318 }
1319 }
1320}
1321
1322fn parse_relative_ref(b: &mut &[u8]) -> Result<(), ParseRIError> {
1329 parse_relative_part(b)?;
1330 if let Some(query) = b.strip_prefix(b"?") {
1331 *b = query;
1332 parse_query(b)?;
1333 }
1334 if let Some(fragment) = b.strip_prefix(b"#") {
1335 *b = fragment;
1336 parse_fragment(b)?;
1337 }
1338 Ok(())
1339}
1340
1341fn parse_relative_part(b: &mut &[u8]) -> Result<(), ParseRIError> {
1351 if let Some(rem) = b.strip_prefix(b"/") {
1352 if let Some(rem) = rem.strip_prefix(b"/") {
1353 *b = rem;
1354 parse_authority(b)?;
1355 parse_path_abempty(b)
1356 } else {
1357 parse_path_absolute(b)
1358 }
1359 } else {
1360 let orig = b.len();
1361 let ret = parse_path_noscheme(b);
1362 if orig == b.len() { Ok(()) } else { ret }
1364 }
1365}
1366
1367fn is_reserved(b: u8) -> bool {
1374 is_gen_delims(b) || is_sub_delims(b)
1375}
1376
1377fn is_gen_delims(b: u8) -> bool {
1384 matches!(b, b':' | b'/' | b'?' | b'#' | b'[' | b']' | b'@')
1385}
1386
1387fn is_sub_delims(b: u8) -> bool {
1394 matches!(
1395 b,
1396 b'!' | b'$' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+' | b',' | b';' | b'='
1397 )
1398}
1399
1400fn is_unreserved(b: u8) -> bool {
1407 b.is_ascii_alphanumeric() || matches!(b, b'-' | b'.' | b'_' | b'~')
1408}
1409
1410const LUT_BYTES: [u8; 256 * 3] = {
1411 const fn digit_to_hex_char(b: u8) -> u8 {
1412 if b < 10 { b + b'0' } else { b - 10 + b'A' }
1413 }
1414 let mut buf = [0u8; 256 * 3];
1415 let mut i = 0;
1416 while i < 256 {
1417 buf[3 * i] = b'%';
1418 let hi = (i as u8 >> 4) & 0xF;
1419 let lo = i as u8 & 0xF;
1420 buf[3 * i + 1] = digit_to_hex_char(hi);
1421 buf[3 * i + 2] = digit_to_hex_char(lo);
1422 i += 1;
1423 }
1424 buf
1425};
1426const LUT: &str = unsafe {
1427 from_utf8_unchecked(&LUT_BYTES)
1431};
1432
1433pub fn escape(s: &str) -> Cow<'_, str> {
1435 escape_except(s, |_| false)
1436}
1437
1438pub fn escape_bytes(b: &[u8]) -> Cow<'_, [u8]> {
1440 escape_bytes_except(b, |_| false)
1441}
1442
1443pub fn escape_except(s: &str, is_except: impl Fn(char) -> bool) -> Cow<'_, str> {
1446 let cap = s
1447 .chars()
1448 .filter_map(|c| (!is_except(c)).then_some(c.len_utf8() * 2))
1449 .sum::<usize>();
1450 if cap == 0 {
1451 return Cow::Borrowed(s);
1452 }
1453 let mut encode = [0; 6];
1454 let mut buf = String::with_capacity(s.len() + cap);
1455 for c in s.chars() {
1456 if is_except(c) {
1457 buf.push(c);
1458 } else {
1459 let encoded = c.encode_utf8(&mut encode);
1460 for b in encoded.bytes() {
1461 let index = b as usize * 3;
1462 buf.push_str(&LUT[index..index + 3]);
1463 }
1464 }
1465 }
1466 Cow::Owned(buf)
1467}
1468
1469pub fn escape_bytes_except(b: &[u8], is_except: impl Fn(u8) -> bool) -> Cow<'_, [u8]> {
1472 let cap = b.iter().copied().filter(|&b| !is_except(b)).count() * 2;
1473 if cap == 0 {
1474 return Cow::Borrowed(b);
1475 }
1476 let mut buf = Vec::with_capacity(b.len() + cap);
1477 for &b in b {
1478 if is_except(b) {
1479 buf.push(b);
1480 } else {
1481 let index = b as usize * 3;
1482 buf.extend_from_slice(&LUT_BYTES[index..index + 3]);
1483 }
1484 }
1485 Cow::Owned(buf)
1486}
1487
1488pub enum URIUnescapeError {
1490 InvalidEscape,
1491 Utf8Error(std::str::Utf8Error),
1492}
1493
1494impl From<std::str::Utf8Error> for URIUnescapeError {
1495 fn from(value: std::str::Utf8Error) -> Self {
1496 Self::Utf8Error(value)
1497 }
1498}
1499
1500pub fn unescape(s: &str) -> Result<Cow<'_, str>, URIUnescapeError> {
1506 if !s.contains('%') {
1507 return Ok(Cow::Borrowed(s));
1508 }
1509
1510 let mut split = s.split('%');
1511 let mut buf = String::with_capacity(s.len());
1512 buf.push_str(split.next().unwrap());
1513 let mut bytes = vec![];
1514 for chunk in split {
1515 if chunk.len() < 2 {
1516 return Err(URIUnescapeError::InvalidEscape);
1517 }
1518 let byte =
1519 u8::from_str_radix(&chunk[..2], 16).map_err(|_| URIUnescapeError::InvalidEscape)?;
1520 bytes.push(byte);
1521
1522 if chunk.len() > 2 {
1523 buf.push_str(from_utf8(&bytes)?);
1524 buf.push_str(&chunk[2..]);
1525 bytes.clear();
1526 }
1527 }
1528
1529 if !bytes.is_empty() {
1530 buf.push_str(from_utf8(&bytes)?);
1531 }
1532 Ok(Cow::Owned(buf))
1533}
1534
1535pub fn unescape_bytes(b: &[u8]) -> Result<Cow<'_, [u8]>, URIUnescapeError> {
1541 if !b.contains(&b'%') {
1542 return Ok(Cow::Borrowed(b));
1543 }
1544
1545 let mut split = b.split(|&b| b == b'%');
1546 let mut buf = Vec::with_capacity(b.len());
1547 buf.extend_from_slice(split.next().unwrap());
1548
1549 fn hexdigit_to_byte(hex: u8) -> u8 {
1550 if hex.is_ascii_digit() {
1551 hex - b'0'
1552 } else if hex.is_ascii_uppercase() {
1553 hex - b'A' + 10
1554 } else {
1555 hex - b'a' + 10
1556 }
1557 }
1558 for chunk in split {
1559 if chunk.len() < 2 || !chunk[0].is_ascii_hexdigit() || !chunk[1].is_ascii_hexdigit() {
1560 return Err(URIUnescapeError::InvalidEscape);
1561 }
1562 let hi = hexdigit_to_byte(chunk[0]);
1563 let lo = hexdigit_to_byte(chunk[1]);
1564 buf.push((hi << 4) | lo);
1565 }
1566 Ok(Cow::Owned(buf))
1567}
1568
1569#[derive(Debug, Clone, Copy)]
1570enum DecomposeState {
1571 Scheme,
1572 Authority,
1573 Root,
1574 Path,
1575 Query,
1576 Fragment,
1577 Finish,
1578}
1579
1580pub struct Components<'a> {
1582 state: DecomposeState,
1583 uri: &'a str,
1584}
1585
1586impl Components<'_> {
1587 fn new(uri: &str) -> Components<'_> {
1588 Components {
1589 state: DecomposeState::Scheme,
1590 uri,
1591 }
1592 }
1593}
1594
1595impl<'a> Iterator for Components<'a> {
1596 type Item = Component<'a>;
1597
1598 fn next(&mut self) -> Option<Self::Item> {
1599 use DecomposeState::*;
1600 loop {
1601 match self.state {
1602 Scheme => {
1603 self.state = Authority;
1604 let mut bytes = self.uri.as_bytes();
1605 if parse_scheme(&mut bytes).is_ok() && bytes.starts_with(b":") {
1606 let len = self.uri.len() - bytes.len();
1607 let (scheme, rem) = self.uri.split_at(len);
1608 self.uri = &rem[1..];
1609 break Some(Component::Scheme(scheme));
1610 }
1611 }
1612 Authority => {
1613 self.state = Root;
1614 if let Some(rem) = self.uri.strip_prefix("//") {
1615 let pos = rem.bytes().position(|b| b == b'/').unwrap_or(rem.len());
1616 let (mut authority, rem) = rem.split_at(pos);
1617 self.uri = rem;
1618 let mut userinfo = None;
1619 if let Some((ui, rem)) = authority.split_once('@') {
1620 userinfo = Some(ui);
1621 authority = rem;
1622 }
1623 let mut port = None;
1624 if let Some((host, p)) = authority.rsplit_once(':')
1625 && p.bytes().all(|b| b.is_ascii_digit())
1626 {
1627 port = Some(p);
1628 authority = host;
1629 }
1630 break Some(Component::Authority {
1631 userinfo,
1632 host: authority,
1633 port,
1634 });
1635 }
1636 }
1637 Root => {
1638 self.state = Path;
1639 if let Some(rem) = self.uri.strip_prefix('/') {
1640 self.uri = rem;
1641 break Some(Component::RootSegment);
1642 }
1643 }
1644 Path => {
1645 let pos = self
1646 .uri
1647 .bytes()
1648 .position(|b| b == b'/' || b == b'?' || b == b'#')
1649 .unwrap_or(self.uri.len());
1650 let (segment, rem) = self.uri.split_at(pos);
1651 if let Some(rem) = rem.strip_prefix('/') {
1652 self.uri = rem;
1653 } else {
1654 self.uri = rem;
1655 self.state = Query;
1656 }
1657 break Some(Component::Segment(segment));
1658 }
1659 Query => {
1660 self.state = Fragment;
1661 if let Some(rem) = self.uri.strip_prefix('?') {
1662 let pos = rem.bytes().position(|b| b == b'#').unwrap_or(rem.len());
1663 let (query, rem) = rem.split_at(pos);
1664 self.uri = rem;
1665 break Some(Component::Query(query));
1666 }
1667 }
1668 Fragment => {
1669 debug_assert!(self.uri.is_empty() || self.uri.starts_with('#'));
1670 self.state = Finish;
1671 if !self.uri.is_empty() {
1672 let (_, frag) = self.uri.split_at(1);
1673 self.uri = "";
1674 break Some(Component::Fragment(frag));
1675 }
1676 }
1677 Finish => break None,
1678 }
1679 }
1680 }
1681}
1682
1683pub enum Component<'a> {
1685 Scheme(&'a str),
1686 Authority {
1687 userinfo: Option<&'a str>,
1688 host: &'a str,
1689 port: Option<&'a str>,
1690 },
1691 RootSegment,
1692 Segment(&'a str),
1693 Query(&'a str),
1694 Fragment(&'a str),
1695}