1use std::{
2 borrow::{Borrow, Cow},
3 ops::Deref,
4 path::Path,
5 rc::Rc,
6 str::{from_utf8, from_utf8_unchecked},
7 sync::Arc,
8};
9
10use crate::ParseRIError;
11
12#[derive(Debug, PartialEq, Eq, Hash)]
13#[repr(transparent)]
14pub struct URIStr {
15 uri: str,
16}
17
18impl URIStr {
19 fn new(s: &str) -> &Self {
20 unsafe {
21 &*(s as *const str as *const Self)
25 }
26 }
27
28 pub fn resolve(&self, reference: &Self) -> URIString {
38 use Component::*;
39
40 let base = if self.is_absolute() {
41 Cow::Borrowed(self)
42 } else {
43 let mut base = self.to_owned();
44 base.normalize();
45 if let Some(frag) = base.uri.bytes().position(|b| b == b'#') {
46 base.uri.truncate(frag);
47 }
48 assert!(
49 base.is_absolute(),
50 "'{}' is not absolute",
51 base.as_escaped_str()
52 );
53 Cow::Owned(base)
54 };
55
56 let mut ref_components = reference.components().peekable();
57 if ref_components
58 .next_if(|comp| matches!(comp, Scheme(_)))
59 .is_some()
60 {
61 let mut ret = reference.to_owned();
62 ret.normalize();
63 return ret;
64 }
65
66 if ref_components
67 .next_if(|comp| matches!(comp, Authority { .. }))
68 .is_some()
69 {
70 let mut ret = URIString {
72 uri: [base.scheme().unwrap(), ":", &reference.uri].concat(),
73 };
74 ret.normalize();
75 return ret;
76 }
77
78 let mut components = base.components().peekable();
79 let mut uri = String::new();
80 if let Some(Scheme(scheme)) = components.next_if(|comp| matches!(comp, Scheme(_))) {
81 uri.push_str(scheme);
82 uri.push(':');
83 }
84 if let Some(Authority {
85 userinfo,
86 host,
87 port,
88 }) = components.next_if(|comp| matches!(comp, Authority { .. }))
89 {
90 uri.push_str("//");
91 if let Some(userinfo) = userinfo {
92 uri.push_str(userinfo);
93 uri.push(':');
94 }
95 uri.push_str(host);
96 if let Some(port) = port {
97 uri.push(':');
98 uri.push_str(port);
99 }
100 }
101
102 if ref_components
103 .next_if(|comp| matches!(comp, RootSegment))
104 .is_some()
105 {
106 uri.push_str(&reference.uri);
107 let mut ret = URIString { uri };
108 ret.normalize();
109 return ret;
110 }
111
112 let mut segments = vec![];
113 let has_root = components
114 .next_if(|comp| matches!(comp, RootSegment))
115 .is_some();
116 let mut has_dot_segment = false;
117 while let Some(Segment(segment)) = components.next_if(|comp| matches!(comp, Segment(_))) {
118 segments.push(segment);
119 has_dot_segment |= segment == "." || segment == "..";
120 }
121 if has_dot_segment {
122 segments = normalize_path_segments(segments.into_iter(), has_root);
123 }
124
125 let mut has_path = false;
126 if let Some(Segment(segment)) = ref_components.next_if(|comp| matches!(comp, Segment(_))) {
127 let mut buf = vec![segment];
128 while let Some(Segment(segment)) =
129 ref_components.next_if(|comp| matches!(comp, Segment(_)))
130 {
131 buf.push(segment);
132 }
133 if buf.len() > 1 || !buf[0].is_empty() {
134 segments.pop();
135 segments.extend(buf);
136 has_path = true;
137 }
138 }
139 build_normalized_path(segments.into_iter(), has_root, &mut uri);
140
141 if let Some(Query(query)) = ref_components.next_if(|comp| matches!(comp, Query(_))) {
142 uri.push('?');
143 uri.push_str(query);
144 } else if !has_path
145 && let Some(Query(query)) = components.next_if(|comp| matches!(comp, Query(_)))
146 {
147 uri.push('?');
148 uri.push_str(query);
149 }
150
151 if let Some(Fragment(fragment)) = ref_components.next() {
152 uri.push('#');
153 uri.push_str(fragment);
154 }
155
156 URIString { uri }
157 }
158
159 pub fn as_escaped_str(&self) -> &str {
161 &self.uri
162 }
163
164 pub fn as_unescaped_str(&self) -> Option<Cow<'_, str>> {
167 unescape(&self.uri).ok()
168 }
169
170 pub fn is_absolute(&self) -> bool {
173 self.scheme().is_some() && self.fragment().is_none()
174 }
175
176 pub fn is_relative(&self) -> bool {
179 self.scheme().is_none()
180 }
181
182 pub fn scheme(&self) -> Option<&str> {
185 let pos = self.uri.bytes().position(is_reserved)?;
186 (self.uri.as_bytes()[pos] == b':').then_some(&self.uri[..pos])
187 }
188
189 pub fn authority(&self) -> Option<&str> {
192 let rem = self
193 .uri
194 .strip_prefix("//")
195 .or_else(|| self.uri.split_once("://").map(|p| p.1))?;
196 Some(rem.split_once('/').map(|p| p.0).unwrap_or(rem))
197 }
198
199 pub fn userinfo(&self) -> Option<&str> {
202 Some(self.authority()?.split_once('@')?.0)
203 }
204
205 pub fn host(&self) -> Option<&str> {
208 let mut auth = self.authority()?;
209 if let Some((_userinfo, rem)) = auth.split_once('@') {
210 auth = rem;
211 }
212 if let Some((host, port)) = auth.rsplit_once(':')
213 && port.bytes().all(|b| b.is_ascii_digit())
214 {
215 auth = host;
216 }
217 Some(auth)
218 }
219
220 pub fn port(&self) -> Option<&str> {
223 let (_, port) = self.authority()?.rsplit_once(':')?;
224 port.bytes().all(|b| b.is_ascii_digit()).then_some(port)
225 }
226
227 pub fn path(&self) -> &str {
230 let mut path = &self.uri;
231 if let Some(scheme) = self.scheme() {
232 path = &path[scheme.len() + 1..];
234 }
235 if let Some(rem) = path.strip_prefix("//") {
236 let pos = rem.bytes().position(|b| b == b'/').unwrap_or(rem.len());
238 path = &rem[pos..]
239 }
240
241 path.split_once(['?', '#']).map(|p| p.0).unwrap_or(path)
242 }
243
244 pub fn query(&self) -> Option<&str> {
247 let pos = self.uri.bytes().position(|b| b == b'?' || b == b'#')?;
248 if self.uri.as_bytes()[pos] == b'#' {
249 return None;
250 }
251 let query = &self.uri[pos + 1..];
252 let pos = query.bytes().position(|b| b == b'#').unwrap_or(query.len());
253 Some(&query[..pos])
254 }
255
256 pub fn fragment(&self) -> Option<&str> {
259 let pos = self.uri.bytes().position(|b| b == b'#')?;
260 Some(&self.uri[pos + 1..])
261 }
262
263 pub fn components(&self) -> Components<'_> {
265 Components::new(&self.uri)
266 }
267}
268
269impl ToOwned for URIStr {
270 type Owned = URIString;
271
272 fn to_owned(&self) -> Self::Owned {
273 URIString {
274 uri: self.uri.to_owned(),
275 }
276 }
277}
278
279impl From<&URIStr> for URIString {
280 fn from(value: &URIStr) -> Self {
281 value.to_owned()
282 }
283}
284
285impl AsRef<URIStr> for URIStr {
286 fn as_ref(&self) -> &URIStr {
287 self
288 }
289}
290
291impl Clone for Box<URIStr> {
292 fn clone(&self) -> Self {
293 self.as_ref().into()
294 }
295}
296
297impl std::fmt::Display for URIStr {
298 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
299 write!(
300 f,
301 "{}",
302 self.as_unescaped_str()
303 .as_deref()
304 .unwrap_or(self.as_escaped_str())
305 )
306 }
307}
308
309macro_rules! impl_boxed_convertion_uri_str {
310 ($( $t:ident ),*) => {
311 $(
312 impl From<&URIStr> for $t<URIStr> {
313 fn from(value: &URIStr) -> Self {
314 let boxed: $t<str> = value.uri.into();
315 unsafe {
316 std::mem::transmute(boxed)
320 }
321 }
322 }
323 )*
324 };
325}
326impl_boxed_convertion_uri_str!(Box, Rc, Arc);
327
328#[derive(Debug, Clone, PartialEq, Eq, Hash)]
329#[repr(transparent)]
330pub struct URIString {
331 uri: String,
342}
343
344impl URIString {
345 pub fn parse(uri: impl AsRef<str>) -> Result<Self, ParseRIError> {
358 fn _parse(uri: &str) -> Result<URIString, ParseRIError> {
359 let uri = escape_except(uri, |b| {
360 b.is_ascii() && (is_reserved(b as u8) || is_unreserved(b as u8))
361 });
362 URIString::parse_escaped(&uri)
363 }
364 _parse(uri.as_ref())
365 }
366
367 pub fn parse_system_id(uri: impl AsRef<str>) -> Result<Self, ParseRIError> {
393 fn _parse(uri: &str) -> Result<URIString, ParseRIError> {
394 let uri = escape_except(uri, |b| {
395 b.is_ascii()
397 && !matches!(
398 b as u8,
399 0..=0x1F
400 | 0x20
401 | 0x22
402 | 0x3C
403 | 0x3E
404 | 0x5C
405 | 0x5E
406 | 0x60
407 | 0x7B..=0x7D
408 | 0x7F..
409 )
410 });
411 URIString::parse_escaped(&uri)
412 }
413 _parse(uri.as_ref())
414 }
415
416 fn parse_escaped(uri: impl AsRef<str>) -> Result<Self, ParseRIError> {
424 fn _parse(uri: &str) -> Result<URIString, ParseRIError> {
425 let mut bytes = uri.as_bytes();
426 parse_uri_reference(&mut bytes)?;
427 if !bytes.is_empty() {
428 Err(ParseRIError::NotTermination)
429 } else {
430 Ok(URIString {
431 uri: uri.to_owned(),
432 })
433 }
434 }
435 _parse(uri.as_ref())
436 }
437
438 pub fn parse_file_path(path: impl AsRef<Path>) -> Result<Self, ParseRIError> {
444 #[cfg(target_family = "unix")]
445 fn _parse_file_path(path: &Path) -> Result<URIString, ParseRIError> {
446 let mut path_str = path.to_str().ok_or(ParseRIError::Unsupported)?.to_owned();
447 if (path.is_dir() || (path.as_os_str().as_encoded_bytes().ends_with(b"\\")))
448 && !path_str.ends_with('/')
449 {
450 path_str.push('/');
451 }
452 if path.is_absolute() {
453 path_str.insert_str(0, "file://");
454 }
455 URIString::parse(path_str)
456 }
457 #[cfg(target_family = "windows")]
458 fn _parse_file_path(path: &Path) -> Result<URIString, ParseRIError> {
459 use std::path::{Component::*, Prefix::*};
460
461 let mut path_str = String::new();
462 let mut verbatim = false;
463 for comp in path.components() {
464 match comp {
465 Prefix(prefix) => match prefix.kind() {
466 Verbatim(root) => {
467 path_str.push_str("file:///");
468 path_str.push_str(
469 &root
470 .to_str()
471 .ok_or(ParseRIError::Unsupported)?
472 .replace('/', "%2F"),
473 );
474 verbatim = true;
475 }
476 VerbatimUNC(server, root) => {
477 path_str.push_str("file://");
478 path_str.push_str(
479 &server
480 .to_str()
481 .ok_or(ParseRIError::Unsupported)?
482 .replace('/', "%2F"),
483 );
484 path_str.push('/');
485 path_str.push_str(
486 &root
487 .to_str()
488 .ok_or(ParseRIError::Unsupported)?
489 .replace('/', "%2F"),
490 );
491 verbatim = true;
492 }
493 VerbatimDisk(letter) => {
494 path_str.push_str("file:");
495 path_str.push(letter as char);
496 path_str.push(':');
497 verbatim = true;
498 }
499 DeviceNS(device) => {
500 path_str.push_str("file:///");
501 path_str.push_str(device.to_str().ok_or(ParseRIError::Unsupported)?);
502 }
503 UNC(server, root) => {
504 path_str.push_str("file://");
505 path_str.push_str(server.to_str().ok_or(ParseRIError::Unsupported)?);
506 path_str.push('/');
507 path_str.push_str(root.to_str().ok_or(ParseRIError::Unsupported)?);
508 }
509 Disk(letter) => {
510 path_str.push_str("file:");
511 path_str.push(letter as char);
512 path_str.push(':');
513 }
514 },
515 RootDir => {}
516 CurDir => path_str.push_str("/."),
517 ParentDir => path_str.push_str("/.."),
518 Normal(segment) => {
519 path_str.push('/');
520 let segment = segment.to_str().ok_or(ParseRIError::Unsupported)?;
521 if verbatim {
522 path_str.push_str(&segment.replace('/', "%2F"));
523 } else {
524 path_str.push_str(segment);
525 }
526 }
527 }
528 }
529 if (path.is_dir()
530 || (path.as_os_str().as_encoded_bytes().ends_with(b"\\")
531 || (!verbatim && path.as_os_str().as_encoded_bytes().ends_with(b"/"))))
532 && !path_str.ends_with('/')
533 {
534 path_str.push('/');
535 }
536 URIString::parse(path_str)
537 }
538 #[cfg(all(not(target_family = "unix"), not(target_family = "windows")))]
539 fn _parse_file_path(path: &Path) -> Result<URIString, ParseRIError> {
540 todo!()
541 }
542 _parse_file_path(path.as_ref())
543 }
544
545 pub fn into_boxed_uri_str(self) -> Box<URIStr> {
546 Box::from(self.as_ref())
547 }
548
549 pub fn normalize(&mut self) {
552 use Component::*;
553
554 let mut uri = String::with_capacity(self.uri.len());
555 let mut paths = vec![];
556 let mut query = None;
557 let mut fragment = None;
558 let mut has_root = false;
559 for comp in self.components() {
560 match comp {
561 Scheme(scheme) => {
562 uri.push_str(&scheme.to_ascii_lowercase());
563 uri.push(':');
564 }
565 Authority {
566 userinfo,
567 host,
568 port,
569 } => {
570 uri.push_str("//");
571 if let Some(userinfo) = userinfo {
572 uri.push_str(userinfo);
573 uri.push('@');
574 }
575 uri.push_str(host);
576 if let Some(port) = port {
577 uri.push(':');
578 uri.push_str(port);
579 }
580 }
581 RootSegment => has_root = true,
582 Segment(segment) => paths.push(segment),
583 Query(q) => query = Some(q),
584 Fragment(f) => fragment = Some(f),
585 }
586 }
587 build_normalized_path(paths.into_iter(), has_root, &mut uri);
588 if let Some(query) = query {
589 uri.push('?');
590 uri.push_str(query);
591 }
592 if let Some(fragment) = fragment {
593 uri.push('#');
594 uri.push_str(fragment);
595 }
596 self.uri = uri;
597 }
598}
599
600impl AsRef<URIStr> for URIString {
601 fn as_ref(&self) -> &URIStr {
602 URIStr::new(&self.uri)
603 }
604}
605
606impl Borrow<URIStr> for URIString {
607 fn borrow(&self) -> &URIStr {
608 self.as_ref()
609 }
610}
611
612impl Deref for URIString {
613 type Target = URIStr;
614
615 fn deref(&self) -> &Self::Target {
616 self.as_ref()
617 }
618}
619
620impl std::fmt::Display for URIString {
621 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
622 write!(f, "{}", self.as_ref())
623 }
624}
625
626macro_rules! impl_convertion_uri_string {
627 ($( $t:ty ),*) => {
628 $(
629 impl From<URIString> for $t {
630 fn from(value: URIString) -> $t {
631 From::from(value.as_ref())
632 }
633 }
634 )*
635 };
636}
637impl_convertion_uri_string!(Box<URIStr>, Rc<URIStr>, Arc<URIStr>);
638
639fn build_normalized_path<'a>(
640 segments: impl Iterator<Item = &'a str>,
641 has_root: bool,
642 buffer: &mut String,
643) {
644 let segments = normalize_path_segments(segments, has_root);
645 if has_root {
646 buffer.push('/');
647 }
648 for (i, seg) in segments.into_iter().enumerate() {
649 if i > 0 {
650 buffer.push('/');
651 }
652 buffer.push_str(seg);
653 }
654}
655
656fn normalize_path_segments<'a>(
657 segments: impl Iterator<Item = &'a str>,
658 has_root: bool,
659) -> Vec<&'a str> {
660 let mut stack = vec![];
661 let mut last_dot = false;
662 for seg in segments {
663 if seg == "." {
664 last_dot = true;
666 } else if seg == ".." {
667 if !stack.is_empty() && stack.last() != Some(&"..") {
668 stack.pop();
669 } else if !has_root {
670 stack.push(seg);
671 }
672 last_dot = true;
673 } else {
674 stack.push(seg);
675 last_dot = false;
676 }
677 }
678
679 if last_dot {
680 stack.push("");
681 }
682
683 stack
684}
685
686fn parse_uri_reference(b: &mut &[u8]) -> Result<(), ParseRIError> {
693 if b.is_empty() || matches!(b[0], b'/' | b'?' | b'#') {
694 parse_relative_ref(b)
697 } else {
698 if !b[0].is_ascii_alphabetic() {
702 parse_relative_ref(b)
705 } else {
706 if let Some(&c) = b
710 .iter()
711 .find(|&&b| !b.is_ascii_alphanumeric() && !matches!(b, b'+' | b'-' | b'.'))
712 && c == b':'
713 {
714 parse_uri(b)
715 } else {
716 parse_relative_ref(b)
717 }
718 }
719 }
720}
721
722fn parse_uri(b: &mut &[u8]) -> Result<(), ParseRIError> {
729 parse_scheme(b)?;
730 *b = b
731 .strip_prefix(b":")
732 .ok_or(ParseRIError::InvalidSchemeSeparator)?;
733 parse_hier_part(b)?;
734 if let Some(query) = b.strip_prefix(b"?") {
735 *b = query;
736 parse_query(b)?;
737 }
738 if let Some(fragment) = b.strip_prefix(b"#") {
739 *b = fragment;
740 parse_fragment(b)?;
741 }
742 Ok(())
743}
744
745fn parse_scheme(b: &mut &[u8]) -> Result<(), ParseRIError> {
752 if b.is_empty() || !b[0].is_ascii_alphabetic() {
753 return Err(ParseRIError::InvalidScheme);
754 }
755 let pos = b
756 .iter()
757 .position(|&b| !b.is_ascii_alphanumeric() && !matches!(b, b'+' | b'-' | b'.'))
758 .unwrap_or(b.len());
759 *b = &b[pos..];
760 Ok(())
761}
762
763fn parse_hier_part(b: &mut &[u8]) -> Result<(), ParseRIError> {
773 if let Some(rem) = b.strip_prefix(b"/") {
774 if let Some(rem) = rem.strip_prefix(b"/") {
777 *b = rem;
781 parse_authority(b)?;
782 parse_path_abempty(b)
783 } else {
784 parse_path_absolute(b)
787 }
788 } else {
789 let mut dum = *b;
791 if parse_pchar(&mut dum).is_ok() {
792 parse_path_rootless(b)
794 } else {
795 Ok(())
799 }
800 }
801}
802
803fn parse_authority(b: &mut &[u8]) -> Result<(), ParseRIError> {
810 if b.starts_with(b"[") {
811 parse_ip_literal(b)?;
813 if let Some(rem) = b.strip_prefix(b":") {
814 *b = rem;
815 parse_port(b)?;
816 }
817 return Ok(());
818 }
819
820 let mut colon = usize::MAX;
846 let mut now = 0;
847 let mut t = *b;
848 while !t.is_empty() {
849 let pos = t
850 .iter()
851 .position(|&b| !is_unreserved(b) && !is_sub_delims(b) && b != b'%')
852 .unwrap_or(t.len());
853 t = &t[pos..];
854 now += pos;
855 if let Some(rem) = t.strip_prefix(b":") {
856 now += 1;
857 t = rem;
858 colon = colon.min(now);
859 } else {
860 break;
861 }
862 }
863
864 debug_assert_eq!(now, b.len() - t.len());
865
866 if let Some(rem) = t.strip_prefix(b"@") {
867 *b = rem;
868 parse_host(b)?;
869 if let Some(rem) = b.strip_prefix(b":") {
870 *b = rem;
871 parse_port(b)?;
872 }
873 Ok(())
874 } else if t.starts_with(b"[") {
875 Err(ParseRIError::InvalidAuthority)
876 } else if colon < usize::MAX {
877 *b = &b[colon + 1..];
878 parse_port(b)
879 } else {
880 *b = t;
881 Ok(())
882 }
883}
884
885fn parse_host(b: &mut &[u8]) -> Result<(), ParseRIError> {
903 if b.starts_with(b"[") {
904 parse_ip_literal(b)
905 } else {
906 parse_reg_name(b)
908 }
909}
910
911fn parse_ip_literal(b: &mut &[u8]) -> Result<(), ParseRIError> {
918 *b = b.strip_prefix(b"[").ok_or(ParseRIError::InvalidIPLiteral)?;
919 if !b.is_empty() && b[0].eq_ignore_ascii_case(&b'v') {
920 parse_ipv_future(b)?;
921 } else {
922 parse_ipv6_address(b)?;
923 }
924 *b = b.strip_prefix(b"]").ok_or(ParseRIError::InvalidIPLiteral)?;
925 Ok(())
926}
927
928fn parse_ipv_future(b: &mut &[u8]) -> Result<(), ParseRIError> {
935 if b.is_empty() || !b[0].eq_ignore_ascii_case(&b'v') {
936 return Err(ParseRIError::InvalidIPvFuture);
937 }
938 *b = &b[1..];
939 let pos = b
940 .iter()
941 .position(|&b| !b.is_ascii_hexdigit())
942 .unwrap_or(b.len());
943 if !(1..=b.len() - 2).contains(&pos) {
944 return Err(ParseRIError::InvalidIPvFuture);
945 }
946 *b = &b[pos..];
947 *b = b.strip_prefix(b".").ok_or(ParseRIError::InvalidIPvFuture)?;
948 let pos = b
949 .iter()
950 .position(|&b| !is_unreserved(b) && !is_sub_delims(b) && b != b':')
951 .unwrap_or(b.len());
952 if pos == 0 {
953 return Err(ParseRIError::InvalidIPvFuture);
954 }
955 *b = &b[pos..];
956 Ok(())
957}
958
959fn parse_ipv6_address(b: &mut &[u8]) -> Result<(), ParseRIError> {
978 let mut cnt = 1;
979 let mut omit = false;
980 if let Some(rem) = b.strip_prefix(b":") {
981 *b = rem;
982 omit = true;
983 } else {
984 parse_h16(b)?;
985 }
986
987 while cnt + (omit as i32) < 8
988 && let Some(rem) = b.strip_prefix(b":")
989 {
990 *b = rem;
991 if b.starts_with(b":") {
992 if omit {
993 return Err(ParseRIError::InvalidIPv6address);
994 }
995 omit = true;
996 cnt += 1;
997 continue;
998 }
999
1000 let mut dum = *b;
1008 if parse_ipv4_address(&mut dum).is_ok() {
1009 *b = dum;
1010 cnt += 2;
1012 break;
1014 } else if !b.is_empty() && b[0].is_ascii_hexdigit() {
1015 parse_h16(b)?;
1016 }
1017 }
1018
1019 if (omit && cnt <= 8) || (!omit && cnt == 8) {
1022 Ok(())
1023 } else {
1024 Err(ParseRIError::InvalidIPv6address)
1025 }
1026}
1027
1028fn parse_h16(b: &mut &[u8]) -> Result<(), ParseRIError> {
1036 let pos = b
1037 .iter()
1038 .position(|&b| !b.is_ascii_hexdigit())
1039 .unwrap_or(b.len());
1040 if pos == 0 {
1041 Err(ParseRIError::InvalidH16)
1042 } else {
1043 *b = &b[pos.min(4)..];
1044 Ok(())
1045 }
1046}
1047
1048fn parse_ipv4_address(b: &mut &[u8]) -> Result<(), ParseRIError> {
1060 parse_dec_octet(b)?;
1061 for _ in 0..3 {
1062 *b = b.strip_prefix(b".").ok_or(ParseRIError::InvalidDecOctet)?;
1063 parse_dec_octet(b)?;
1064 }
1065 Ok(())
1066}
1067fn parse_dec_octet(b: &mut &[u8]) -> Result<(), ParseRIError> {
1068 let len = match b {
1069 [b'2', b'5', b'0'..=b'5', ..] => 3,
1070 [b'2', b'0'..=b'4', b'0'..=b'9', ..] => 3,
1071 [b'1', b'0'..=b'9', b'0'..=b'9', ..] => 3,
1072 [b'1'..=b'9', b'0'..=b'9', ..] => 2,
1073 [b'0'..=b'9', ..] => 1,
1074 _ => return Err(ParseRIError::InvalidDecOctet),
1075 };
1076 *b = &b[len..];
1077 Ok(())
1078}
1079
1080fn parse_reg_name(b: &mut &[u8]) -> Result<(), ParseRIError> {
1087 while !b.is_empty() && !matches!(b[0], b':' | b'@') && parse_pchar(b).is_ok() {}
1090 Ok(())
1091}
1092
1093fn parse_port(b: &mut &[u8]) -> Result<(), ParseRIError> {
1100 let pos = b
1101 .iter()
1102 .position(|&b| !b.is_ascii_digit())
1103 .unwrap_or(b.len());
1104 *b = &b[pos..];
1105 Ok(())
1106}
1107
1108fn parse_path_abempty(b: &mut &[u8]) -> Result<(), ParseRIError> {
1115 while let Some(rem) = b.strip_prefix(b"/") {
1116 *b = rem;
1117 parse_segment(b)?;
1118 }
1119 Ok(())
1120}
1121
1122fn parse_path_absolute(b: &mut &[u8]) -> Result<(), ParseRIError> {
1129 *b = b
1130 .strip_prefix(b"/")
1131 .ok_or(ParseRIError::InvalidPathAbsolute)?;
1132 if parse_segment_nz(b).is_ok() {
1133 while let Some(rem) = b.strip_prefix(b"/") {
1134 *b = rem;
1135 parse_segment(b)?;
1136 }
1137 }
1138 Ok(())
1139}
1140
1141fn parse_path_noscheme(b: &mut &[u8]) -> Result<(), ParseRIError> {
1148 parse_segment_nz_nc(b)?;
1149 while let Some(rem) = b.strip_prefix(b"/") {
1150 *b = rem;
1151 parse_segment(b)?;
1152 }
1153 Ok(())
1154}
1155
1156fn parse_path_rootless(b: &mut &[u8]) -> Result<(), ParseRIError> {
1163 parse_segment_nz(b)?;
1164 while let Some(rem) = b.strip_prefix(b"/") {
1165 *b = rem;
1166 parse_segment(b)?;
1167 }
1168 Ok(())
1169}
1170
1171fn parse_segment(b: &mut &[u8]) -> Result<(), ParseRIError> {
1189 while parse_pchar(b).is_ok() {}
1190 Ok(())
1191}
1192
1193fn parse_segment_nz(b: &mut &[u8]) -> Result<(), ParseRIError> {
1200 parse_pchar(b)?;
1201 while parse_pchar(b).is_ok() {}
1202 Ok(())
1203}
1204
1205fn parse_segment_nz_nc(b: &mut &[u8]) -> Result<(), ParseRIError> {
1213 if b.is_empty() || b[0] == b':' || parse_pchar(b).is_err() {
1214 return Err(ParseRIError::InvalidSegmentNzNc);
1215 }
1216 while !b.is_empty() && b[0] != b':' && parse_pchar(b).is_ok() {}
1217 Ok(())
1218}
1219
1220fn parse_pchar(b: &mut &[u8]) -> Result<(), ParseRIError> {
1227 if b.is_empty() {
1228 return Err(ParseRIError::InvalidPChar);
1229 }
1230
1231 if is_unreserved(b[0]) || is_sub_delims(b[0]) || matches!(b[0], b':' | b'@') {
1232 *b = &b[1..];
1233 Ok(())
1234 } else if b.len() >= 3 && b[0] == b'%' && b[1].is_ascii_hexdigit() && b[2].is_ascii_hexdigit() {
1235 *b = &b[3..];
1236 Ok(())
1237 } else {
1238 Err(ParseRIError::InvalidPChar)
1239 }
1240}
1241
1242fn parse_query(b: &mut &[u8]) -> Result<(), ParseRIError> {
1249 loop {
1250 if let Some(rem) = b.strip_prefix(b"/") {
1251 *b = rem;
1252 } else if let Some(rem) = b.strip_prefix(b"?") {
1253 *b = rem;
1254 } else if parse_pchar(b).is_ok() {
1255 } else {
1257 break Ok(());
1258 }
1259 }
1260}
1261
1262fn parse_fragment(b: &mut &[u8]) -> Result<(), ParseRIError> {
1269 loop {
1270 if let Some(rem) = b.strip_prefix(b"/") {
1271 *b = rem;
1272 } else if let Some(rem) = b.strip_prefix(b"?") {
1273 *b = rem;
1274 } else if parse_pchar(b).is_ok() {
1275 } else {
1277 break Ok(());
1278 }
1279 }
1280}
1281
1282fn parse_relative_ref(b: &mut &[u8]) -> Result<(), ParseRIError> {
1289 parse_relative_part(b)?;
1290 if let Some(query) = b.strip_prefix(b"?") {
1291 *b = query;
1292 parse_query(b)?;
1293 }
1294 if let Some(fragment) = b.strip_prefix(b"#") {
1295 *b = fragment;
1296 parse_fragment(b)?;
1297 }
1298 Ok(())
1299}
1300
1301fn parse_relative_part(b: &mut &[u8]) -> Result<(), ParseRIError> {
1311 if let Some(rem) = b.strip_prefix(b"/") {
1312 if let Some(rem) = rem.strip_prefix(b"/") {
1313 *b = rem;
1314 parse_authority(b)?;
1315 parse_path_abempty(b)
1316 } else {
1317 parse_path_absolute(b)
1318 }
1319 } else {
1320 let orig = b.len();
1321 let ret = parse_path_noscheme(b);
1322 if orig == b.len() { Ok(()) } else { ret }
1324 }
1325}
1326
1327fn is_reserved(b: u8) -> bool {
1334 is_gen_delims(b) || is_sub_delims(b)
1335}
1336
1337fn is_gen_delims(b: u8) -> bool {
1344 matches!(b, b':' | b'/' | b'?' | b'#' | b'[' | b']' | b'@')
1345}
1346
1347fn is_sub_delims(b: u8) -> bool {
1354 matches!(
1355 b,
1356 b'!' | b'$' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+' | b',' | b';' | b'='
1357 )
1358}
1359
1360fn is_unreserved(b: u8) -> bool {
1367 b.is_ascii_alphanumeric() || matches!(b, b'-' | b'.' | b'_' | b'~')
1368}
1369
1370const LUT_BYTES: [u8; 256 * 3] = {
1371 const fn digit_to_hex_char(b: u8) -> u8 {
1372 if b < 10 { b + b'0' } else { b - 10 + b'A' }
1373 }
1374 let mut buf = [0u8; 256 * 3];
1375 let mut i = 0;
1376 while i < 256 {
1377 buf[3 * i] = b'%';
1378 let hi = (i as u8 >> 4) & 0xF;
1379 let lo = i as u8 & 0xF;
1380 buf[3 * i + 1] = digit_to_hex_char(hi);
1381 buf[3 * i + 2] = digit_to_hex_char(lo);
1382 i += 1;
1383 }
1384 buf
1385};
1386const LUT: &str = unsafe {
1387 from_utf8_unchecked(&LUT_BYTES)
1391};
1392
1393pub fn escape(s: &str) -> Cow<'_, str> {
1394 escape_except(s, |_| false)
1395}
1396
1397pub fn escape_bytes(b: &[u8]) -> Cow<'_, [u8]> {
1398 escape_bytes_except(b, |_| false)
1399}
1400
1401pub fn escape_except(s: &str, is_except: impl Fn(char) -> bool) -> Cow<'_, str> {
1402 let cap = s
1403 .chars()
1404 .filter_map(|c| (!is_except(c)).then_some(c.len_utf8() * 2))
1405 .sum::<usize>();
1406 if cap == 0 {
1407 return Cow::Borrowed(s);
1408 }
1409 let mut encode = [0; 6];
1410 let mut buf = String::with_capacity(s.len() + cap);
1411 for c in s.chars() {
1412 if is_except(c) {
1413 buf.push(c);
1414 } else {
1415 let encoded = c.encode_utf8(&mut encode);
1416 for b in encoded.bytes() {
1417 let index = b as usize * 3;
1418 buf.push_str(&LUT[index..index + 3]);
1419 }
1420 }
1421 }
1422 Cow::Owned(buf)
1423}
1424
1425pub fn escape_bytes_except(b: &[u8], is_except: impl Fn(u8) -> bool) -> Cow<'_, [u8]> {
1426 let cap = b.iter().copied().filter(|&b| !is_except(b)).count() * 2;
1427 if cap == 0 {
1428 return Cow::Borrowed(b);
1429 }
1430 let mut buf = Vec::with_capacity(b.len() + cap);
1431 for &b in b {
1432 if is_except(b) {
1433 buf.push(b);
1434 } else {
1435 let index = b as usize * 3;
1436 buf.extend_from_slice(&LUT_BYTES[index..index + 3]);
1437 }
1438 }
1439 Cow::Owned(buf)
1440}
1441
1442pub enum URIUnescapeError {
1443 InvalidEscape,
1444 Utf8Error(std::str::Utf8Error),
1445}
1446
1447impl From<std::str::Utf8Error> for URIUnescapeError {
1448 fn from(value: std::str::Utf8Error) -> Self {
1449 Self::Utf8Error(value)
1450 }
1451}
1452
1453pub fn unescape(s: &str) -> Result<Cow<'_, str>, URIUnescapeError> {
1454 if !s.contains('%') {
1455 return Ok(Cow::Borrowed(s));
1456 }
1457
1458 let mut split = s.split('%');
1459 let mut buf = String::with_capacity(s.len());
1460 buf.push_str(split.next().unwrap());
1461 let mut bytes = vec![];
1462 for chunk in split {
1463 if chunk.len() < 2 {
1464 return Err(URIUnescapeError::InvalidEscape);
1465 }
1466 let byte =
1467 u8::from_str_radix(&chunk[..2], 16).map_err(|_| URIUnescapeError::InvalidEscape)?;
1468 bytes.push(byte);
1469
1470 if chunk.len() > 2 {
1471 buf.push_str(from_utf8(&bytes)?);
1472 buf.push_str(&chunk[2..]);
1473 bytes.clear();
1474 }
1475 }
1476
1477 if !bytes.is_empty() {
1478 buf.push_str(from_utf8(&bytes)?);
1479 }
1480 Ok(Cow::Owned(buf))
1481}
1482
1483pub fn unescape_bytes(b: &[u8]) -> Result<Cow<'_, [u8]>, URIUnescapeError> {
1484 if !b.contains(&b'%') {
1485 return Ok(Cow::Borrowed(b));
1486 }
1487
1488 let mut split = b.split(|&b| b == b'%');
1489 let mut buf = Vec::with_capacity(b.len());
1490 buf.extend_from_slice(split.next().unwrap());
1491
1492 fn hexdigit_to_byte(hex: u8) -> u8 {
1493 if hex.is_ascii_digit() {
1494 hex - b'0'
1495 } else if hex.is_ascii_uppercase() {
1496 hex - b'A' + 10
1497 } else {
1498 hex - b'a' + 10
1499 }
1500 }
1501 for chunk in split {
1502 if chunk.len() < 2 || !chunk[0].is_ascii_hexdigit() || !chunk[1].is_ascii_hexdigit() {
1503 return Err(URIUnescapeError::InvalidEscape);
1504 }
1505 let hi = hexdigit_to_byte(chunk[0]);
1506 let lo = hexdigit_to_byte(chunk[1]);
1507 buf.push((hi << 4) | lo);
1508 }
1509 Ok(Cow::Owned(buf))
1510}
1511
1512#[derive(Debug, Clone, Copy)]
1513enum DecomposeState {
1514 Scheme,
1515 Authority,
1516 Root,
1517 Path,
1518 Query,
1519 Fragment,
1520 Finish,
1521}
1522
1523pub struct Components<'a> {
1524 state: DecomposeState,
1525 uri: &'a str,
1526}
1527
1528impl Components<'_> {
1529 fn new(uri: &str) -> Components<'_> {
1530 Components {
1531 state: DecomposeState::Scheme,
1532 uri,
1533 }
1534 }
1535}
1536
1537impl<'a> Iterator for Components<'a> {
1538 type Item = Component<'a>;
1539
1540 fn next(&mut self) -> Option<Self::Item> {
1541 use DecomposeState::*;
1542 loop {
1543 match self.state {
1544 Scheme => {
1545 self.state = Authority;
1546 let mut bytes = self.uri.as_bytes();
1547 if parse_scheme(&mut bytes).is_ok() && bytes.starts_with(b":") {
1548 let len = self.uri.len() - bytes.len();
1549 let (scheme, rem) = self.uri.split_at(len);
1550 self.uri = &rem[1..];
1551 break Some(Component::Scheme(scheme));
1552 }
1553 }
1554 Authority => {
1555 self.state = Root;
1556 if let Some(rem) = self.uri.strip_prefix("//") {
1557 let pos = rem.bytes().position(|b| b == b'/').unwrap_or(rem.len());
1558 let (mut authority, rem) = rem.split_at(pos);
1559 self.uri = rem;
1560 let mut userinfo = None;
1561 if let Some((ui, rem)) = authority.split_once('@') {
1562 userinfo = Some(ui);
1563 authority = rem;
1564 }
1565 let mut port = None;
1566 if let Some((host, p)) = authority.rsplit_once(':')
1567 && p.bytes().all(|b| b.is_ascii_digit())
1568 {
1569 port = Some(p);
1570 authority = host;
1571 }
1572 break Some(Component::Authority {
1573 userinfo,
1574 host: authority,
1575 port,
1576 });
1577 }
1578 }
1579 Root => {
1580 self.state = Path;
1581 if let Some(rem) = self.uri.strip_prefix('/') {
1582 self.uri = rem;
1583 break Some(Component::RootSegment);
1584 }
1585 }
1586 Path => {
1587 let pos = self
1588 .uri
1589 .bytes()
1590 .position(|b| b == b'/' || b == b'?' || b == b'#')
1591 .unwrap_or(self.uri.len());
1592 let (segment, rem) = self.uri.split_at(pos);
1593 if let Some(rem) = rem.strip_prefix('/') {
1594 self.uri = rem;
1595 } else {
1596 self.uri = rem;
1597 self.state = Query;
1598 }
1599 break Some(Component::Segment(segment));
1600 }
1601 Query => {
1602 self.state = Fragment;
1603 if let Some(rem) = self.uri.strip_prefix('?') {
1604 let pos = rem.bytes().position(|b| b == b'#').unwrap_or(rem.len());
1605 let (query, rem) = rem.split_at(pos);
1606 self.uri = rem;
1607 break Some(Component::Query(query));
1608 }
1609 }
1610 Fragment => {
1611 debug_assert!(self.uri.is_empty() || self.uri.starts_with('#'));
1612 self.state = Finish;
1613 if !self.uri.is_empty() {
1614 let (_, frag) = self.uri.split_at(1);
1615 self.uri = "";
1616 break Some(Component::Fragment(frag));
1617 }
1618 }
1619 Finish => break None,
1620 }
1621 }
1622 }
1623}
1624
1625pub enum Component<'a> {
1626 Scheme(&'a str),
1627 Authority {
1628 userinfo: Option<&'a str>,
1629 host: &'a str,
1630 port: Option<&'a str>,
1631 },
1632 RootSegment,
1633 Segment(&'a str),
1634 Query(&'a str),
1635 Fragment(&'a str),
1636}