1use std::{
2 borrow::{Borrow, Cow},
3 ops::Deref,
4 path::Path,
5 rc::Rc,
6 str::{from_utf8, from_utf8_unchecked},
7 sync::Arc,
8};
9
10use crate::ParseRIError;
11
12#[derive(Debug, PartialEq, Eq, Hash)]
13#[repr(transparent)]
14pub struct URIStr {
15 uri: str,
16}
17
18impl URIStr {
19 fn new(s: &str) -> &Self {
20 unsafe {
21 &*(s as *const str as *const Self)
25 }
26 }
27
28 pub fn resolve(&self, reference: &Self) -> URIString {
38 use Component::*;
39
40 let base = if self.is_absolute() {
41 Cow::Borrowed(self)
42 } else {
43 let mut base = self.to_owned();
44 base.normalize();
45 if let Some(frag) = base.uri.bytes().position(|b| b == b'#') {
46 base.uri.truncate(frag);
47 }
48 assert!(
49 base.is_absolute(),
50 "'{}' is not absolute",
51 base.as_escaped_str()
52 );
53 Cow::Owned(base)
54 };
55
56 let mut ref_components = reference.components().peekable();
57 if ref_components
58 .next_if(|comp| matches!(comp, Scheme(_)))
59 .is_some()
60 {
61 let mut ret = reference.to_owned();
62 ret.normalize();
63 return ret;
64 }
65
66 if ref_components
67 .next_if(|comp| matches!(comp, Authority { .. }))
68 .is_some()
69 {
70 let mut ret = URIString {
72 uri: [base.scheme().unwrap(), ":", &reference.uri].concat(),
73 };
74 ret.normalize();
75 return ret;
76 }
77
78 let mut components = base.components().peekable();
79 let mut uri = String::new();
80 if let Some(Scheme(scheme)) = components.next_if(|comp| matches!(comp, Scheme(_))) {
81 uri.push_str(scheme);
82 uri.push(':');
83 }
84 if let Some(Authority {
85 userinfo,
86 host,
87 port,
88 }) = components.next_if(|comp| matches!(comp, Authority { .. }))
89 {
90 uri.push_str("//");
91 if let Some(userinfo) = userinfo {
92 uri.push_str(userinfo);
93 uri.push(':');
94 }
95 uri.push_str(host);
96 if let Some(port) = port {
97 uri.push(':');
98 uri.push_str(port);
99 }
100 }
101
102 if ref_components
103 .next_if(|comp| matches!(comp, RootSegment))
104 .is_some()
105 {
106 uri.push_str(&reference.uri);
107 let mut ret = URIString { uri };
108 ret.normalize();
109 return ret;
110 }
111
112 let mut segments = vec![];
113 let has_root = components
114 .next_if(|comp| matches!(comp, RootSegment))
115 .is_some();
116 let mut has_dot_segment = false;
117 while let Some(Segment(segment)) = components.next_if(|comp| matches!(comp, Segment(_))) {
118 segments.push(segment);
119 has_dot_segment |= segment == "." || segment == "..";
120 }
121 if has_dot_segment {
122 segments = normalize_path_segments(segments.into_iter(), has_root);
123 }
124
125 let mut has_path = false;
126 if let Some(Segment(segment)) = ref_components.next_if(|comp| matches!(comp, Segment(_))) {
127 let mut buf = vec![segment];
128 while let Some(Segment(segment)) =
129 ref_components.next_if(|comp| matches!(comp, Segment(_)))
130 {
131 buf.push(segment);
132 }
133 if buf.len() > 1 || !buf[0].is_empty() {
134 segments.pop();
135 segments.extend(buf);
136 has_path = true;
137 }
138 }
139 build_normalized_path(segments.into_iter(), has_root, &mut uri);
140
141 if let Some(Query(query)) = ref_components.next_if(|comp| matches!(comp, Query(_))) {
142 uri.push('?');
143 uri.push_str(query);
144 } else if !has_path
145 && let Some(Query(query)) = components.next_if(|comp| matches!(comp, Query(_)))
146 {
147 uri.push('?');
148 uri.push_str(query);
149 }
150
151 if let Some(Fragment(fragment)) = ref_components.next() {
152 uri.push('#');
153 uri.push_str(fragment);
154 }
155
156 URIString { uri }
157 }
158
159 pub fn as_escaped_str(&self) -> &str {
161 &self.uri
162 }
163
164 pub fn as_unescaped_str(&self) -> Option<Cow<'_, str>> {
167 unescape(&self.uri).ok()
168 }
169
170 pub fn is_absolute(&self) -> bool {
173 self.scheme().is_some() && self.fragment().is_none()
174 }
175
176 pub fn is_relative(&self) -> bool {
179 self.scheme().is_none()
180 }
181
182 pub fn scheme(&self) -> Option<&str> {
185 let pos = self.uri.bytes().position(is_reserved)?;
186 (self.uri.as_bytes()[pos] == b':').then_some(&self.uri[..pos])
187 }
188
189 pub fn authority(&self) -> Option<&str> {
192 let rem = self
193 .uri
194 .strip_prefix("//")
195 .or_else(|| self.uri.split_once("://").map(|p| p.1))?;
196 Some(rem.split_once('/').map(|p| p.0).unwrap_or(rem))
197 }
198
199 pub fn userinfo(&self) -> Option<&str> {
202 Some(self.authority()?.split_once('@')?.0)
203 }
204
205 pub fn host(&self) -> Option<&str> {
208 let mut auth = self.authority()?;
209 if let Some((_userinfo, rem)) = auth.split_once('@') {
210 auth = rem;
211 }
212 if let Some((host, port)) = auth.rsplit_once(':')
213 && port.bytes().all(|b| b.is_ascii_digit())
214 {
215 auth = host;
216 }
217 Some(auth)
218 }
219
220 pub fn port(&self) -> Option<&str> {
223 let (_, port) = self.authority()?.rsplit_once(':')?;
224 port.bytes().all(|b| b.is_ascii_digit()).then_some(port)
225 }
226
227 pub fn path(&self) -> &str {
230 let mut path = &self.uri;
231 if let Some(scheme) = self.scheme() {
232 path = &path[scheme.len() + 1..];
234 }
235 if let Some(rem) = path.strip_prefix("//") {
236 let pos = rem.bytes().position(|b| b == b'/').unwrap_or(rem.len());
238 path = &rem[pos..]
239 }
240
241 path.split_once(['?', '#']).map(|p| p.0).unwrap_or(path)
242 }
243
244 pub fn query(&self) -> Option<&str> {
247 let pos = self.uri.bytes().position(|b| b == b'?' || b == b'#')?;
248 if self.uri.as_bytes()[pos] == b'#' {
249 return None;
250 }
251 let query = &self.uri[pos + 1..];
252 let pos = query.bytes().position(|b| b == b'#').unwrap_or(query.len());
253 Some(&query[..pos])
254 }
255
256 pub fn fragment(&self) -> Option<&str> {
259 let pos = self.uri.bytes().position(|b| b == b'#')?;
260 Some(&self.uri[pos + 1..])
261 }
262
263 pub fn components(&self) -> Components<'_> {
265 Components::new(&self.uri)
266 }
267}
268
269impl ToOwned for URIStr {
270 type Owned = URIString;
271
272 fn to_owned(&self) -> Self::Owned {
273 URIString {
274 uri: self.uri.to_owned(),
275 }
276 }
277}
278
279impl From<&URIStr> for URIString {
280 fn from(value: &URIStr) -> Self {
281 value.to_owned()
282 }
283}
284
285impl AsRef<URIStr> for URIStr {
286 fn as_ref(&self) -> &URIStr {
287 self
288 }
289}
290
291impl Clone for Box<URIStr> {
292 fn clone(&self) -> Self {
293 self.as_ref().into()
294 }
295}
296
297impl std::fmt::Display for URIStr {
298 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
299 write!(
300 f,
301 "{}",
302 self.as_unescaped_str()
303 .as_deref()
304 .unwrap_or(self.as_escaped_str())
305 )
306 }
307}
308
309macro_rules! impl_boxed_convertion_uri_str {
310 ($( $t:ident ),*) => {
311 $(
312 impl From<&URIStr> for $t<URIStr> {
313 fn from(value: &URIStr) -> Self {
314 let boxed: $t<str> = value.uri.into();
315 unsafe {
316 std::mem::transmute(boxed)
320 }
321 }
322 }
323 )*
324 };
325}
326impl_boxed_convertion_uri_str!(Box, Rc, Arc);
327
328#[derive(Debug, Clone, PartialEq, Eq, Hash)]
329#[repr(transparent)]
330pub struct URIString {
331 uri: String,
342}
343
344impl URIString {
345 pub fn parse(uri: impl AsRef<str>) -> Result<Self, ParseRIError> {
358 fn _parse(uri: &str) -> Result<URIString, ParseRIError> {
359 let uri = escape_except(uri, |b| {
360 b.is_ascii() && (is_reserved(b as u8) || is_unreserved(b as u8))
361 });
362 URIString::parse_escaped(&uri)
363 }
364 _parse(uri.as_ref())
365 }
366
367 pub fn parse_system_id(uri: impl AsRef<str>) -> Result<Self, ParseRIError> {
393 fn _parse(uri: &str) -> Result<URIString, ParseRIError> {
394 let uri = escape_except(uri, |b| {
395 b.is_ascii()
397 && !matches!(
398 b as u8,
399 0..=0x1F
400 | 0x20
401 | 0x22
402 | 0x3C
403 | 0x3E
404 | 0x5C
405 | 0x5E
406 | 0x60
407 | 0x7B..=0x7D
408 | 0x7F..
409 )
410 });
411 URIString::parse_escaped(&uri)
412 }
413 _parse(uri.as_ref())
414 }
415
416 fn parse_escaped(uri: impl AsRef<str>) -> Result<Self, ParseRIError> {
424 fn _parse(uri: &str) -> Result<URIString, ParseRIError> {
425 let mut bytes = uri.as_bytes();
426 parse_uri_reference(&mut bytes)?;
427 if !bytes.is_empty() {
428 Err(ParseRIError::NotTermination)
429 } else {
430 Ok(URIString {
431 uri: uri.to_owned(),
432 })
433 }
434 }
435 _parse(uri.as_ref())
436 }
437
438 pub fn parse_file_path(path: impl AsRef<Path>) -> Result<Self, ParseRIError> {
444 #[cfg(target_family = "unix")]
445 fn _parse_file_path(path: &Path) -> Result<URIString, ParseRIError> {
446 let mut path_str = path.to_str().ok_or(ParseRIError::Unsupported)?.to_owned();
447 if (path.is_dir() || (path.as_os_str().as_encoded_bytes().ends_with(b"\\")))
448 && !path_str.ends_with('/')
449 {
450 path_str.push('/');
451 }
452 if path.is_absolute() {
453 path_str.insert_str(0, "file://");
454 }
455 URIString::parse(path_str)
456 }
457 #[cfg(target_family = "windows")]
458 fn _parse_file_path(path: &Path) -> Result<URIString, ParseRIError> {
459 use std::path::{Component::*, Prefix::*};
460
461 let mut path_str = String::new();
462 let mut verbatim = false;
463 for comp in path.components() {
464 match comp {
465 Prefix(prefix) => match prefix.kind() {
466 Verbatim(root) => {
467 path_str.push_str("file:///");
468 path_str.push_str(
469 &root
470 .to_str()
471 .ok_or(ParseRIError::Unsupported)?
472 .replace('/', "%2F"),
473 );
474 verbatim = true;
475 }
476 VerbatimUNC(server, root) => {
477 path_str.push_str("file://");
478 path_str.push_str(
479 &server
480 .to_str()
481 .ok_or(ParseRIError::Unsupported)?
482 .replace('/', "%2F"),
483 );
484 path_str.push('/');
485 path_str.push_str(
486 &root
487 .to_str()
488 .ok_or(ParseRIError::Unsupported)?
489 .replace('/', "%2F"),
490 );
491 verbatim = true;
492 }
493 VerbatimDisk(letter) => {
494 path_str.push_str("file:");
495 path_str.push(letter as char);
496 path_str.push(':');
497 verbatim = true;
498 }
499 DeviceNS(device) => {
500 path_str.push_str("file:///");
501 path_str.push_str(device.to_str().ok_or(ParseRIError::Unsupported)?);
502 }
503 UNC(server, root) => {
504 path_str.push_str("file://");
505 path_str.push_str(server.to_str().ok_or(ParseRIError::Unsupported)?);
506 path_str.push('/');
507 path_str.push_str(root.to_str().ok_or(ParseRIError::Unsupported)?);
508 }
509 Disk(letter) => {
510 path_str.push_str("file:");
511 path_str.push(letter as char);
512 path_str.push(':');
513 }
514 },
515 RootDir => {}
516 CurDir => {
517 if !path_str.is_empty() {
518 path_str.push_str("/.");
519 } else {
520 path_str.push_str(".");
521 }
522 }
523 ParentDir => {
524 if !path_str.is_empty() {
525 path_str.push_str("/..");
526 } else {
527 path_str.push_str("..")
528 }
529 }
530 Normal(segment) => {
531 if !path_str.is_empty() {
532 path_str.push('/');
533 }
534 let segment = segment.to_str().ok_or(ParseRIError::Unsupported)?;
535 if verbatim {
536 path_str.push_str(&segment.replace('/', "%2F"));
537 } else {
538 path_str.push_str(segment);
539 }
540 }
541 }
542 }
543 if (path.is_dir()
544 || (path.as_os_str().as_encoded_bytes().ends_with(b"\\")
545 || (!verbatim && path.as_os_str().as_encoded_bytes().ends_with(b"/"))))
546 && !path_str.ends_with('/')
547 {
548 path_str.push('/');
549 }
550 URIString::parse(path_str)
551 }
552 #[cfg(all(not(target_family = "unix"), not(target_family = "windows")))]
553 fn _parse_file_path(path: &Path) -> Result<URIString, ParseRIError> {
554 todo!()
555 }
556 _parse_file_path(path.as_ref())
557 }
558
559 pub fn into_boxed_uri_str(self) -> Box<URIStr> {
560 Box::from(self.as_ref())
561 }
562
563 pub fn normalize(&mut self) {
566 use Component::*;
567
568 let mut uri = String::with_capacity(self.uri.len());
569 let mut paths = vec![];
570 let mut query = None;
571 let mut fragment = None;
572 let mut has_root = false;
573 for comp in self.components() {
574 match comp {
575 Scheme(scheme) => {
576 uri.push_str(&scheme.to_ascii_lowercase());
577 uri.push(':');
578 }
579 Authority {
580 userinfo,
581 host,
582 port,
583 } => {
584 uri.push_str("//");
585 if let Some(userinfo) = userinfo {
586 uri.push_str(userinfo);
587 uri.push('@');
588 }
589 uri.push_str(host);
590 if let Some(port) = port {
591 uri.push(':');
592 uri.push_str(port);
593 }
594 }
595 RootSegment => has_root = true,
596 Segment(segment) => paths.push(segment),
597 Query(q) => query = Some(q),
598 Fragment(f) => fragment = Some(f),
599 }
600 }
601 build_normalized_path(paths.into_iter(), has_root, &mut uri);
602 if let Some(query) = query {
603 uri.push('?');
604 uri.push_str(query);
605 }
606 if let Some(fragment) = fragment {
607 uri.push('#');
608 uri.push_str(fragment);
609 }
610 self.uri = uri;
611 }
612}
613
614impl AsRef<URIStr> for URIString {
615 fn as_ref(&self) -> &URIStr {
616 URIStr::new(&self.uri)
617 }
618}
619
620impl Borrow<URIStr> for URIString {
621 fn borrow(&self) -> &URIStr {
622 self.as_ref()
623 }
624}
625
626impl Deref for URIString {
627 type Target = URIStr;
628
629 fn deref(&self) -> &Self::Target {
630 self.as_ref()
631 }
632}
633
634impl std::fmt::Display for URIString {
635 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
636 write!(f, "{}", self.as_ref())
637 }
638}
639
640macro_rules! impl_convertion_uri_string {
641 ($( $t:ty ),*) => {
642 $(
643 impl From<URIString> for $t {
644 fn from(value: URIString) -> $t {
645 From::from(value.as_ref())
646 }
647 }
648 )*
649 };
650}
651impl_convertion_uri_string!(Box<URIStr>, Rc<URIStr>, Arc<URIStr>);
652
653fn build_normalized_path<'a>(
654 segments: impl Iterator<Item = &'a str>,
655 has_root: bool,
656 buffer: &mut String,
657) {
658 let segments = normalize_path_segments(segments, has_root);
659 if has_root {
660 buffer.push('/');
661 }
662 for (i, seg) in segments.into_iter().enumerate() {
663 if i > 0 {
664 buffer.push('/');
665 }
666 buffer.push_str(seg);
667 }
668}
669
670fn normalize_path_segments<'a>(
671 segments: impl Iterator<Item = &'a str>,
672 has_root: bool,
673) -> Vec<&'a str> {
674 let mut stack = vec![];
675 let mut last_dot = false;
676 for seg in segments {
677 if seg == "." {
678 last_dot = true;
680 } else if seg == ".." {
681 if !stack.is_empty() && stack.last() != Some(&"..") {
682 stack.pop();
683 } else if !has_root {
684 stack.push(seg);
685 }
686 last_dot = true;
687 } else {
688 stack.push(seg);
689 last_dot = false;
690 }
691 }
692
693 if last_dot {
694 stack.push("");
695 }
696
697 stack
698}
699
700fn parse_uri_reference(b: &mut &[u8]) -> Result<(), ParseRIError> {
707 if b.is_empty() || matches!(b[0], b'/' | b'?' | b'#') {
708 parse_relative_ref(b)
711 } else {
712 if !b[0].is_ascii_alphabetic() {
716 parse_relative_ref(b)
719 } else {
720 if let Some(&c) = b
724 .iter()
725 .find(|&&b| !b.is_ascii_alphanumeric() && !matches!(b, b'+' | b'-' | b'.'))
726 && c == b':'
727 {
728 parse_uri(b)
729 } else {
730 parse_relative_ref(b)
731 }
732 }
733 }
734}
735
736fn parse_uri(b: &mut &[u8]) -> Result<(), ParseRIError> {
743 parse_scheme(b)?;
744 *b = b
745 .strip_prefix(b":")
746 .ok_or(ParseRIError::InvalidSchemeSeparator)?;
747 parse_hier_part(b)?;
748 if let Some(query) = b.strip_prefix(b"?") {
749 *b = query;
750 parse_query(b)?;
751 }
752 if let Some(fragment) = b.strip_prefix(b"#") {
753 *b = fragment;
754 parse_fragment(b)?;
755 }
756 Ok(())
757}
758
759fn parse_scheme(b: &mut &[u8]) -> Result<(), ParseRIError> {
766 if b.is_empty() || !b[0].is_ascii_alphabetic() {
767 return Err(ParseRIError::InvalidScheme);
768 }
769 let pos = b
770 .iter()
771 .position(|&b| !b.is_ascii_alphanumeric() && !matches!(b, b'+' | b'-' | b'.'))
772 .unwrap_or(b.len());
773 *b = &b[pos..];
774 Ok(())
775}
776
777fn parse_hier_part(b: &mut &[u8]) -> Result<(), ParseRIError> {
787 if let Some(rem) = b.strip_prefix(b"/") {
788 if let Some(rem) = rem.strip_prefix(b"/") {
791 *b = rem;
795 parse_authority(b)?;
796 parse_path_abempty(b)
797 } else {
798 parse_path_absolute(b)
801 }
802 } else {
803 let mut dum = *b;
805 if parse_pchar(&mut dum).is_ok() {
806 parse_path_rootless(b)
808 } else {
809 Ok(())
813 }
814 }
815}
816
817fn parse_authority(b: &mut &[u8]) -> Result<(), ParseRIError> {
824 if b.starts_with(b"[") {
825 parse_ip_literal(b)?;
827 if let Some(rem) = b.strip_prefix(b":") {
828 *b = rem;
829 parse_port(b)?;
830 }
831 return Ok(());
832 }
833
834 let mut colon = usize::MAX;
860 let mut now = 0;
861 let mut t = *b;
862 while !t.is_empty() {
863 let pos = t
864 .iter()
865 .position(|&b| !is_unreserved(b) && !is_sub_delims(b) && b != b'%')
866 .unwrap_or(t.len());
867 t = &t[pos..];
868 now += pos;
869 if let Some(rem) = t.strip_prefix(b":") {
870 now += 1;
871 t = rem;
872 colon = colon.min(now);
873 } else {
874 break;
875 }
876 }
877
878 debug_assert_eq!(now, b.len() - t.len());
879
880 if let Some(rem) = t.strip_prefix(b"@") {
881 *b = rem;
882 parse_host(b)?;
883 if let Some(rem) = b.strip_prefix(b":") {
884 *b = rem;
885 parse_port(b)?;
886 }
887 Ok(())
888 } else if t.starts_with(b"[") {
889 Err(ParseRIError::InvalidAuthority)
890 } else if colon < usize::MAX {
891 *b = &b[colon + 1..];
892 parse_port(b)
893 } else {
894 *b = t;
895 Ok(())
896 }
897}
898
899fn parse_host(b: &mut &[u8]) -> Result<(), ParseRIError> {
917 if b.starts_with(b"[") {
918 parse_ip_literal(b)
919 } else {
920 parse_reg_name(b)
922 }
923}
924
925fn parse_ip_literal(b: &mut &[u8]) -> Result<(), ParseRIError> {
932 *b = b.strip_prefix(b"[").ok_or(ParseRIError::InvalidIPLiteral)?;
933 if !b.is_empty() && b[0].eq_ignore_ascii_case(&b'v') {
934 parse_ipv_future(b)?;
935 } else {
936 parse_ipv6_address(b)?;
937 }
938 *b = b.strip_prefix(b"]").ok_or(ParseRIError::InvalidIPLiteral)?;
939 Ok(())
940}
941
942fn parse_ipv_future(b: &mut &[u8]) -> Result<(), ParseRIError> {
949 if b.is_empty() || !b[0].eq_ignore_ascii_case(&b'v') {
950 return Err(ParseRIError::InvalidIPvFuture);
951 }
952 *b = &b[1..];
953 let pos = b
954 .iter()
955 .position(|&b| !b.is_ascii_hexdigit())
956 .unwrap_or(b.len());
957 if !(1..=b.len() - 2).contains(&pos) {
958 return Err(ParseRIError::InvalidIPvFuture);
959 }
960 *b = &b[pos..];
961 *b = b.strip_prefix(b".").ok_or(ParseRIError::InvalidIPvFuture)?;
962 let pos = b
963 .iter()
964 .position(|&b| !is_unreserved(b) && !is_sub_delims(b) && b != b':')
965 .unwrap_or(b.len());
966 if pos == 0 {
967 return Err(ParseRIError::InvalidIPvFuture);
968 }
969 *b = &b[pos..];
970 Ok(())
971}
972
973fn parse_ipv6_address(b: &mut &[u8]) -> Result<(), ParseRIError> {
992 let mut cnt = 1;
993 let mut omit = false;
994 if let Some(rem) = b.strip_prefix(b":") {
995 *b = rem;
996 omit = true;
997 } else {
998 parse_h16(b)?;
999 }
1000
1001 while cnt + (omit as i32) < 8
1002 && let Some(rem) = b.strip_prefix(b":")
1003 {
1004 *b = rem;
1005 if b.starts_with(b":") {
1006 if omit {
1007 return Err(ParseRIError::InvalidIPv6address);
1008 }
1009 omit = true;
1010 cnt += 1;
1011 continue;
1012 }
1013
1014 let mut dum = *b;
1022 if parse_ipv4_address(&mut dum).is_ok() {
1023 *b = dum;
1024 cnt += 2;
1026 break;
1028 } else if !b.is_empty() && b[0].is_ascii_hexdigit() {
1029 parse_h16(b)?;
1030 }
1031 }
1032
1033 if (omit && cnt <= 8) || (!omit && cnt == 8) {
1036 Ok(())
1037 } else {
1038 Err(ParseRIError::InvalidIPv6address)
1039 }
1040}
1041
1042fn parse_h16(b: &mut &[u8]) -> Result<(), ParseRIError> {
1050 let pos = b
1051 .iter()
1052 .position(|&b| !b.is_ascii_hexdigit())
1053 .unwrap_or(b.len());
1054 if pos == 0 {
1055 Err(ParseRIError::InvalidH16)
1056 } else {
1057 *b = &b[pos.min(4)..];
1058 Ok(())
1059 }
1060}
1061
1062fn parse_ipv4_address(b: &mut &[u8]) -> Result<(), ParseRIError> {
1074 parse_dec_octet(b)?;
1075 for _ in 0..3 {
1076 *b = b.strip_prefix(b".").ok_or(ParseRIError::InvalidDecOctet)?;
1077 parse_dec_octet(b)?;
1078 }
1079 Ok(())
1080}
1081fn parse_dec_octet(b: &mut &[u8]) -> Result<(), ParseRIError> {
1082 let len = match b {
1083 [b'2', b'5', b'0'..=b'5', ..] => 3,
1084 [b'2', b'0'..=b'4', b'0'..=b'9', ..] => 3,
1085 [b'1', b'0'..=b'9', b'0'..=b'9', ..] => 3,
1086 [b'1'..=b'9', b'0'..=b'9', ..] => 2,
1087 [b'0'..=b'9', ..] => 1,
1088 _ => return Err(ParseRIError::InvalidDecOctet),
1089 };
1090 *b = &b[len..];
1091 Ok(())
1092}
1093
1094fn parse_reg_name(b: &mut &[u8]) -> Result<(), ParseRIError> {
1101 while !b.is_empty() && !matches!(b[0], b':' | b'@') && parse_pchar(b).is_ok() {}
1104 Ok(())
1105}
1106
1107fn parse_port(b: &mut &[u8]) -> Result<(), ParseRIError> {
1114 let pos = b
1115 .iter()
1116 .position(|&b| !b.is_ascii_digit())
1117 .unwrap_or(b.len());
1118 *b = &b[pos..];
1119 Ok(())
1120}
1121
1122fn parse_path_abempty(b: &mut &[u8]) -> Result<(), ParseRIError> {
1129 while let Some(rem) = b.strip_prefix(b"/") {
1130 *b = rem;
1131 parse_segment(b)?;
1132 }
1133 Ok(())
1134}
1135
1136fn parse_path_absolute(b: &mut &[u8]) -> Result<(), ParseRIError> {
1143 *b = b
1144 .strip_prefix(b"/")
1145 .ok_or(ParseRIError::InvalidPathAbsolute)?;
1146 if parse_segment_nz(b).is_ok() {
1147 while let Some(rem) = b.strip_prefix(b"/") {
1148 *b = rem;
1149 parse_segment(b)?;
1150 }
1151 }
1152 Ok(())
1153}
1154
1155fn parse_path_noscheme(b: &mut &[u8]) -> Result<(), ParseRIError> {
1162 parse_segment_nz_nc(b)?;
1163 while let Some(rem) = b.strip_prefix(b"/") {
1164 *b = rem;
1165 parse_segment(b)?;
1166 }
1167 Ok(())
1168}
1169
1170fn parse_path_rootless(b: &mut &[u8]) -> Result<(), ParseRIError> {
1177 parse_segment_nz(b)?;
1178 while let Some(rem) = b.strip_prefix(b"/") {
1179 *b = rem;
1180 parse_segment(b)?;
1181 }
1182 Ok(())
1183}
1184
1185fn parse_segment(b: &mut &[u8]) -> Result<(), ParseRIError> {
1203 while parse_pchar(b).is_ok() {}
1204 Ok(())
1205}
1206
1207fn parse_segment_nz(b: &mut &[u8]) -> Result<(), ParseRIError> {
1214 parse_pchar(b)?;
1215 while parse_pchar(b).is_ok() {}
1216 Ok(())
1217}
1218
1219fn parse_segment_nz_nc(b: &mut &[u8]) -> Result<(), ParseRIError> {
1227 if b.is_empty() || b[0] == b':' || parse_pchar(b).is_err() {
1228 return Err(ParseRIError::InvalidSegmentNzNc);
1229 }
1230 while !b.is_empty() && b[0] != b':' && parse_pchar(b).is_ok() {}
1231 Ok(())
1232}
1233
1234fn parse_pchar(b: &mut &[u8]) -> Result<(), ParseRIError> {
1241 if b.is_empty() {
1242 return Err(ParseRIError::InvalidPChar);
1243 }
1244
1245 if is_unreserved(b[0]) || is_sub_delims(b[0]) || matches!(b[0], b':' | b'@') {
1246 *b = &b[1..];
1247 Ok(())
1248 } else if b.len() >= 3 && b[0] == b'%' && b[1].is_ascii_hexdigit() && b[2].is_ascii_hexdigit() {
1249 *b = &b[3..];
1250 Ok(())
1251 } else {
1252 Err(ParseRIError::InvalidPChar)
1253 }
1254}
1255
1256fn parse_query(b: &mut &[u8]) -> Result<(), ParseRIError> {
1263 loop {
1264 if let Some(rem) = b.strip_prefix(b"/") {
1265 *b = rem;
1266 } else if let Some(rem) = b.strip_prefix(b"?") {
1267 *b = rem;
1268 } else if parse_pchar(b).is_ok() {
1269 } else {
1271 break Ok(());
1272 }
1273 }
1274}
1275
1276fn parse_fragment(b: &mut &[u8]) -> Result<(), ParseRIError> {
1283 loop {
1284 if let Some(rem) = b.strip_prefix(b"/") {
1285 *b = rem;
1286 } else if let Some(rem) = b.strip_prefix(b"?") {
1287 *b = rem;
1288 } else if parse_pchar(b).is_ok() {
1289 } else {
1291 break Ok(());
1292 }
1293 }
1294}
1295
1296fn parse_relative_ref(b: &mut &[u8]) -> Result<(), ParseRIError> {
1303 parse_relative_part(b)?;
1304 if let Some(query) = b.strip_prefix(b"?") {
1305 *b = query;
1306 parse_query(b)?;
1307 }
1308 if let Some(fragment) = b.strip_prefix(b"#") {
1309 *b = fragment;
1310 parse_fragment(b)?;
1311 }
1312 Ok(())
1313}
1314
1315fn parse_relative_part(b: &mut &[u8]) -> Result<(), ParseRIError> {
1325 if let Some(rem) = b.strip_prefix(b"/") {
1326 if let Some(rem) = rem.strip_prefix(b"/") {
1327 *b = rem;
1328 parse_authority(b)?;
1329 parse_path_abempty(b)
1330 } else {
1331 parse_path_absolute(b)
1332 }
1333 } else {
1334 let orig = b.len();
1335 let ret = parse_path_noscheme(b);
1336 if orig == b.len() { Ok(()) } else { ret }
1338 }
1339}
1340
1341fn is_reserved(b: u8) -> bool {
1348 is_gen_delims(b) || is_sub_delims(b)
1349}
1350
1351fn is_gen_delims(b: u8) -> bool {
1358 matches!(b, b':' | b'/' | b'?' | b'#' | b'[' | b']' | b'@')
1359}
1360
1361fn is_sub_delims(b: u8) -> bool {
1368 matches!(
1369 b,
1370 b'!' | b'$' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+' | b',' | b';' | b'='
1371 )
1372}
1373
1374fn is_unreserved(b: u8) -> bool {
1381 b.is_ascii_alphanumeric() || matches!(b, b'-' | b'.' | b'_' | b'~')
1382}
1383
1384const LUT_BYTES: [u8; 256 * 3] = {
1385 const fn digit_to_hex_char(b: u8) -> u8 {
1386 if b < 10 { b + b'0' } else { b - 10 + b'A' }
1387 }
1388 let mut buf = [0u8; 256 * 3];
1389 let mut i = 0;
1390 while i < 256 {
1391 buf[3 * i] = b'%';
1392 let hi = (i as u8 >> 4) & 0xF;
1393 let lo = i as u8 & 0xF;
1394 buf[3 * i + 1] = digit_to_hex_char(hi);
1395 buf[3 * i + 2] = digit_to_hex_char(lo);
1396 i += 1;
1397 }
1398 buf
1399};
1400const LUT: &str = unsafe {
1401 from_utf8_unchecked(&LUT_BYTES)
1405};
1406
1407pub fn escape(s: &str) -> Cow<'_, str> {
1408 escape_except(s, |_| false)
1409}
1410
1411pub fn escape_bytes(b: &[u8]) -> Cow<'_, [u8]> {
1412 escape_bytes_except(b, |_| false)
1413}
1414
1415pub fn escape_except(s: &str, is_except: impl Fn(char) -> bool) -> Cow<'_, str> {
1416 let cap = s
1417 .chars()
1418 .filter_map(|c| (!is_except(c)).then_some(c.len_utf8() * 2))
1419 .sum::<usize>();
1420 if cap == 0 {
1421 return Cow::Borrowed(s);
1422 }
1423 let mut encode = [0; 6];
1424 let mut buf = String::with_capacity(s.len() + cap);
1425 for c in s.chars() {
1426 if is_except(c) {
1427 buf.push(c);
1428 } else {
1429 let encoded = c.encode_utf8(&mut encode);
1430 for b in encoded.bytes() {
1431 let index = b as usize * 3;
1432 buf.push_str(&LUT[index..index + 3]);
1433 }
1434 }
1435 }
1436 Cow::Owned(buf)
1437}
1438
1439pub fn escape_bytes_except(b: &[u8], is_except: impl Fn(u8) -> bool) -> Cow<'_, [u8]> {
1440 let cap = b.iter().copied().filter(|&b| !is_except(b)).count() * 2;
1441 if cap == 0 {
1442 return Cow::Borrowed(b);
1443 }
1444 let mut buf = Vec::with_capacity(b.len() + cap);
1445 for &b in b {
1446 if is_except(b) {
1447 buf.push(b);
1448 } else {
1449 let index = b as usize * 3;
1450 buf.extend_from_slice(&LUT_BYTES[index..index + 3]);
1451 }
1452 }
1453 Cow::Owned(buf)
1454}
1455
1456pub enum URIUnescapeError {
1457 InvalidEscape,
1458 Utf8Error(std::str::Utf8Error),
1459}
1460
1461impl From<std::str::Utf8Error> for URIUnescapeError {
1462 fn from(value: std::str::Utf8Error) -> Self {
1463 Self::Utf8Error(value)
1464 }
1465}
1466
1467pub fn unescape(s: &str) -> Result<Cow<'_, str>, URIUnescapeError> {
1468 if !s.contains('%') {
1469 return Ok(Cow::Borrowed(s));
1470 }
1471
1472 let mut split = s.split('%');
1473 let mut buf = String::with_capacity(s.len());
1474 buf.push_str(split.next().unwrap());
1475 let mut bytes = vec![];
1476 for chunk in split {
1477 if chunk.len() < 2 {
1478 return Err(URIUnescapeError::InvalidEscape);
1479 }
1480 let byte =
1481 u8::from_str_radix(&chunk[..2], 16).map_err(|_| URIUnescapeError::InvalidEscape)?;
1482 bytes.push(byte);
1483
1484 if chunk.len() > 2 {
1485 buf.push_str(from_utf8(&bytes)?);
1486 buf.push_str(&chunk[2..]);
1487 bytes.clear();
1488 }
1489 }
1490
1491 if !bytes.is_empty() {
1492 buf.push_str(from_utf8(&bytes)?);
1493 }
1494 Ok(Cow::Owned(buf))
1495}
1496
1497pub fn unescape_bytes(b: &[u8]) -> Result<Cow<'_, [u8]>, URIUnescapeError> {
1498 if !b.contains(&b'%') {
1499 return Ok(Cow::Borrowed(b));
1500 }
1501
1502 let mut split = b.split(|&b| b == b'%');
1503 let mut buf = Vec::with_capacity(b.len());
1504 buf.extend_from_slice(split.next().unwrap());
1505
1506 fn hexdigit_to_byte(hex: u8) -> u8 {
1507 if hex.is_ascii_digit() {
1508 hex - b'0'
1509 } else if hex.is_ascii_uppercase() {
1510 hex - b'A' + 10
1511 } else {
1512 hex - b'a' + 10
1513 }
1514 }
1515 for chunk in split {
1516 if chunk.len() < 2 || !chunk[0].is_ascii_hexdigit() || !chunk[1].is_ascii_hexdigit() {
1517 return Err(URIUnescapeError::InvalidEscape);
1518 }
1519 let hi = hexdigit_to_byte(chunk[0]);
1520 let lo = hexdigit_to_byte(chunk[1]);
1521 buf.push((hi << 4) | lo);
1522 }
1523 Ok(Cow::Owned(buf))
1524}
1525
1526#[derive(Debug, Clone, Copy)]
1527enum DecomposeState {
1528 Scheme,
1529 Authority,
1530 Root,
1531 Path,
1532 Query,
1533 Fragment,
1534 Finish,
1535}
1536
1537pub struct Components<'a> {
1538 state: DecomposeState,
1539 uri: &'a str,
1540}
1541
1542impl Components<'_> {
1543 fn new(uri: &str) -> Components<'_> {
1544 Components {
1545 state: DecomposeState::Scheme,
1546 uri,
1547 }
1548 }
1549}
1550
1551impl<'a> Iterator for Components<'a> {
1552 type Item = Component<'a>;
1553
1554 fn next(&mut self) -> Option<Self::Item> {
1555 use DecomposeState::*;
1556 loop {
1557 match self.state {
1558 Scheme => {
1559 self.state = Authority;
1560 let mut bytes = self.uri.as_bytes();
1561 if parse_scheme(&mut bytes).is_ok() && bytes.starts_with(b":") {
1562 let len = self.uri.len() - bytes.len();
1563 let (scheme, rem) = self.uri.split_at(len);
1564 self.uri = &rem[1..];
1565 break Some(Component::Scheme(scheme));
1566 }
1567 }
1568 Authority => {
1569 self.state = Root;
1570 if let Some(rem) = self.uri.strip_prefix("//") {
1571 let pos = rem.bytes().position(|b| b == b'/').unwrap_or(rem.len());
1572 let (mut authority, rem) = rem.split_at(pos);
1573 self.uri = rem;
1574 let mut userinfo = None;
1575 if let Some((ui, rem)) = authority.split_once('@') {
1576 userinfo = Some(ui);
1577 authority = rem;
1578 }
1579 let mut port = None;
1580 if let Some((host, p)) = authority.rsplit_once(':')
1581 && p.bytes().all(|b| b.is_ascii_digit())
1582 {
1583 port = Some(p);
1584 authority = host;
1585 }
1586 break Some(Component::Authority {
1587 userinfo,
1588 host: authority,
1589 port,
1590 });
1591 }
1592 }
1593 Root => {
1594 self.state = Path;
1595 if let Some(rem) = self.uri.strip_prefix('/') {
1596 self.uri = rem;
1597 break Some(Component::RootSegment);
1598 }
1599 }
1600 Path => {
1601 let pos = self
1602 .uri
1603 .bytes()
1604 .position(|b| b == b'/' || b == b'?' || b == b'#')
1605 .unwrap_or(self.uri.len());
1606 let (segment, rem) = self.uri.split_at(pos);
1607 if let Some(rem) = rem.strip_prefix('/') {
1608 self.uri = rem;
1609 } else {
1610 self.uri = rem;
1611 self.state = Query;
1612 }
1613 break Some(Component::Segment(segment));
1614 }
1615 Query => {
1616 self.state = Fragment;
1617 if let Some(rem) = self.uri.strip_prefix('?') {
1618 let pos = rem.bytes().position(|b| b == b'#').unwrap_or(rem.len());
1619 let (query, rem) = rem.split_at(pos);
1620 self.uri = rem;
1621 break Some(Component::Query(query));
1622 }
1623 }
1624 Fragment => {
1625 debug_assert!(self.uri.is_empty() || self.uri.starts_with('#'));
1626 self.state = Finish;
1627 if !self.uri.is_empty() {
1628 let (_, frag) = self.uri.split_at(1);
1629 self.uri = "";
1630 break Some(Component::Fragment(frag));
1631 }
1632 }
1633 Finish => break None,
1634 }
1635 }
1636 }
1637}
1638
1639pub enum Component<'a> {
1640 Scheme(&'a str),
1641 Authority {
1642 userinfo: Option<&'a str>,
1643 host: &'a str,
1644 port: Option<&'a str>,
1645 },
1646 RootSegment,
1647 Segment(&'a str),
1648 Query(&'a str),
1649 Fragment(&'a str),
1650}