1use std::{
2 borrow::{Borrow, Cow},
3 ops::Deref,
4 path::Path,
5 rc::Rc,
6 str::{from_utf8, from_utf8_unchecked},
7 sync::Arc,
8};
9
10use crate::ParseRIError;
11
12#[derive(Debug, PartialEq, Eq, Hash)]
13#[repr(transparent)]
14pub struct URIStr {
15 uri: str,
16}
17
18impl URIStr {
19 fn new(s: &str) -> &Self {
20 unsafe {
21 &*(s as *const str as *const Self)
25 }
26 }
27
28 pub fn resolve(&self, reference: &Self) -> URIString {
38 use Component::*;
39
40 let base = if self.is_absolute() {
41 Cow::Borrowed(self)
42 } else {
43 let mut base = self.to_owned();
44 base.normalize();
45 if let Some(frag) = base.uri.bytes().position(|b| b == b'#') {
46 base.uri.truncate(frag);
47 }
48 assert!(
49 base.is_absolute(),
50 "'{}' is not absolute",
51 base.as_escaped_str()
52 );
53 Cow::Owned(base)
54 };
55
56 let mut ref_components = reference.components().peekable();
57 if ref_components
58 .next_if(|comp| matches!(comp, Scheme(_)))
59 .is_some()
60 {
61 let mut ret = reference.to_owned();
62 ret.normalize();
63 return ret;
64 }
65
66 if ref_components
67 .next_if(|comp| matches!(comp, Authority { .. }))
68 .is_some()
69 {
70 let mut ret = URIString {
72 uri: [base.scheme().unwrap(), ":", &reference.uri].concat(),
73 };
74 ret.normalize();
75 return ret;
76 }
77
78 let mut components = base.components().peekable();
79 let mut uri = String::new();
80 if let Some(Scheme(scheme)) = components.next_if(|comp| matches!(comp, Scheme(_))) {
81 uri.push_str(scheme);
82 uri.push(':');
83 }
84 if let Some(Authority {
85 userinfo,
86 host,
87 port,
88 }) = components.next_if(|comp| matches!(comp, Authority { .. }))
89 {
90 uri.push_str("//");
91 if let Some(userinfo) = userinfo {
92 uri.push_str(userinfo);
93 uri.push(':');
94 }
95 uri.push_str(host);
96 if let Some(port) = port {
97 uri.push(':');
98 uri.push_str(port);
99 }
100 }
101
102 if ref_components
103 .next_if(|comp| matches!(comp, RootSegment))
104 .is_some()
105 {
106 uri.push_str(&reference.uri);
107 let mut ret = URIString { uri };
108 ret.normalize();
109 return ret;
110 }
111
112 let mut segments = vec![];
113 let has_root = components
114 .next_if(|comp| matches!(comp, RootSegment))
115 .is_some();
116 let mut has_dot_segment = false;
117 while let Some(Segment(segment)) = components.next_if(|comp| matches!(comp, Segment(_))) {
118 segments.push(segment);
119 has_dot_segment |= segment == "." || segment == "..";
120 }
121 if has_dot_segment {
122 segments = normalize_path_segments(segments.into_iter(), has_root);
123 }
124
125 let mut has_path = false;
126 if let Some(Segment(segment)) = ref_components.next_if(|comp| matches!(comp, Segment(_))) {
127 let mut buf = vec![segment];
128 while let Some(Segment(segment)) =
129 ref_components.next_if(|comp| matches!(comp, Segment(_)))
130 {
131 buf.push(segment);
132 }
133 if buf.len() > 1 || !buf[0].is_empty() {
134 segments.pop();
135 segments.extend(buf);
136 has_path = true;
137 }
138 }
139 build_normalized_path(segments.into_iter(), has_root, &mut uri);
140
141 if let Some(Query(query)) = ref_components.next_if(|comp| matches!(comp, Query(_))) {
142 uri.push('?');
143 uri.push_str(query);
144 } else if !has_path
145 && let Some(Query(query)) = components.next_if(|comp| matches!(comp, Query(_)))
146 {
147 uri.push('?');
148 uri.push_str(query);
149 }
150
151 if let Some(Fragment(fragment)) = ref_components.next() {
152 uri.push('#');
153 uri.push_str(fragment);
154 }
155
156 URIString { uri }
157 }
158
159 pub fn as_escaped_str(&self) -> &str {
161 &self.uri
162 }
163
164 pub fn as_unescaped_str(&self) -> Option<Cow<'_, str>> {
167 unescape(&self.uri).ok()
168 }
169
170 pub fn is_absolute(&self) -> bool {
173 self.scheme().is_some() && self.fragment().is_none()
174 }
175
176 pub fn is_relative(&self) -> bool {
179 self.scheme().is_none()
180 }
181
182 pub fn scheme(&self) -> Option<&str> {
185 let pos = self.uri.bytes().position(is_reserved)?;
186 (self.uri.as_bytes()[pos] == b':').then_some(&self.uri[..pos])
187 }
188
189 pub fn authority(&self) -> Option<&str> {
192 let rem = self
193 .uri
194 .strip_prefix("//")
195 .or_else(|| self.uri.split_once("://").map(|p| p.1))?;
196 Some(rem.split_once('/').map(|p| p.0).unwrap_or(rem))
197 }
198
199 pub fn userinfo(&self) -> Option<&str> {
202 Some(self.authority()?.split_once('@')?.0)
203 }
204
205 pub fn host(&self) -> Option<&str> {
208 let mut auth = self.authority()?;
209 if let Some((_userinfo, rem)) = auth.split_once('@') {
210 auth = rem;
211 }
212 if let Some((host, port)) = auth.rsplit_once(':')
213 && port.bytes().all(|b| b.is_ascii_digit())
214 {
215 auth = host;
216 }
217 Some(auth)
218 }
219
220 pub fn port(&self) -> Option<&str> {
223 let (_, port) = self.authority()?.rsplit_once(':')?;
224 port.bytes().all(|b| b.is_ascii_digit()).then_some(port)
225 }
226
227 pub fn path(&self) -> &str {
230 let mut path = &self.uri;
231 if let Some(scheme) = self.scheme() {
232 path = &path[scheme.len() + 1..];
234 }
235 if let Some(rem) = path.strip_prefix("//") {
236 let pos = rem.bytes().position(|b| b == b'/').unwrap_or(rem.len());
238 path = &rem[pos..]
239 }
240
241 path.split_once(['?', '#']).map(|p| p.0).unwrap_or(path)
242 }
243
244 pub fn query(&self) -> Option<&str> {
247 let pos = self.uri.bytes().position(|b| b == b'?' || b == b'#')?;
248 if self.uri.as_bytes()[pos] == b'#' {
249 return None;
250 }
251 let query = &self.uri[pos + 1..];
252 let pos = query.bytes().position(|b| b == b'#').unwrap_or(query.len());
253 Some(&query[..pos])
254 }
255
256 pub fn fragment(&self) -> Option<&str> {
259 let pos = self.uri.bytes().position(|b| b == b'#')?;
260 Some(&self.uri[pos + 1..])
261 }
262
263 pub fn components(&self) -> Components<'_> {
265 Components::new(&self.uri)
266 }
267}
268
269impl ToOwned for URIStr {
270 type Owned = URIString;
271
272 fn to_owned(&self) -> Self::Owned {
273 URIString {
274 uri: self.uri.to_owned(),
275 }
276 }
277}
278
279impl From<&URIStr> for URIString {
280 fn from(value: &URIStr) -> Self {
281 value.to_owned()
282 }
283}
284
285impl Clone for Box<URIStr> {
286 fn clone(&self) -> Self {
287 self.as_ref().into()
288 }
289}
290
291macro_rules! impl_boxed_convertion_uri_str {
292 ($( $t:ident ),*) => {
293 $(
294 impl From<&URIStr> for $t<URIStr> {
295 fn from(value: &URIStr) -> Self {
296 let boxed: $t<str> = value.uri.into();
297 unsafe {
298 std::mem::transmute(boxed)
302 }
303 }
304 }
305 )*
306 };
307}
308impl_boxed_convertion_uri_str!(Box, Rc, Arc);
309
310#[derive(Debug, Clone, PartialEq, Eq, Hash)]
311#[repr(transparent)]
312pub struct URIString {
313 uri: String,
324}
325
326impl URIString {
327 pub fn parse(uri: impl AsRef<str>) -> Result<Self, ParseRIError> {
328 fn _parse(uri: &str) -> Result<URIString, ParseRIError> {
329 let uri = escape_except(uri, |b| {
330 b.is_ascii() && (is_reserved(b as u8) || is_unreserved(b as u8))
331 });
332 let mut bytes = uri.as_bytes();
333 parse_uri_reference(&mut bytes)?;
334 if !bytes.is_empty() {
335 Err(ParseRIError::NotTermination)
336 } else {
337 Ok(URIString {
338 uri: uri.into_owned(),
339 })
340 }
341 }
342 _parse(uri.as_ref())
343 }
344
345 pub fn parse_file_path(path: impl AsRef<Path>) -> Result<Self, ParseRIError> {
351 #[cfg(target_family = "unix")]
352 fn _parse_file_path(path: &Path) -> Result<URIString, ParseRIError> {
353 let mut path_str = path.to_str().ok_or(ParseRIError::Unsupported)?.to_owned();
354 if (path.is_dir() || (path.as_os_str().as_encoded_bytes().ends_with(b"\\")))
355 && !path_str.ends_with('/')
356 {
357 path_str.push('/');
358 }
359 if path.is_absolute() {
360 path_str.insert_str(0, "file://");
361 }
362 URIString::parse(path_str)
363 }
364 #[cfg(target_family = "windows")]
365 fn _parse_file_path(path: &Path) -> Result<URIString, ParseRIError> {
366 use std::path::{Component::*, Prefix::*};
367
368 let mut path_str = String::new();
369 let mut verbatim = false;
370 for comp in path.components() {
371 match comp {
372 Prefix(prefix) => match prefix.kind() {
373 Verbatim(root) => {
374 path_str.push_str("file:///");
375 path_str.push_str(
376 &root
377 .to_str()
378 .ok_or(ParseRIError::Unsupported)?
379 .replace('/', "%2F"),
380 );
381 verbatim = true;
382 }
383 VerbatimUNC(server, root) => {
384 path_str.push_str("file://");
385 path_str.push_str(
386 &server
387 .to_str()
388 .ok_or(ParseRIError::Unsupported)?
389 .replace('/', "%2F"),
390 );
391 path_str.push('/');
392 path_str.push_str(
393 &root
394 .to_str()
395 .ok_or(ParseRIError::Unsupported)?
396 .replace('/', "%2F"),
397 );
398 verbatim = true;
399 }
400 VerbatimDisk(letter) => {
401 path_str.push_str("file:");
402 path_str.push(letter as char);
403 path_str.push(':');
404 verbatim = true;
405 }
406 DeviceNS(device) => {
407 path_str.push_str("file:///");
408 path_str.push_str(device.to_str().ok_or(ParseRIError::Unsupported)?);
409 }
410 UNC(server, root) => {
411 path_str.push_str("file://");
412 path_str.push_str(server.to_str().ok_or(ParseRIError::Unsupported)?);
413 path_str.push('/');
414 path_str.push_str(root.to_str().ok_or(ParseRIError::Unsupported)?);
415 }
416 Disk(letter) => {
417 path_str.push_str("file:");
418 path_str.push(letter as char);
419 path_str.push(':');
420 }
421 },
422 RootDir => {}
423 CurDir => path_str.push_str("/."),
424 ParentDir => path_str.push_str("/.."),
425 Normal(segment) => {
426 path_str.push('/');
427 let segment = segment.to_str().ok_or(ParseRIError::Unsupported)?;
428 if verbatim {
429 path_str.push_str(&segment.replace('/', "%2F"));
430 } else {
431 path_str.push_str(segment);
432 }
433 }
434 }
435 }
436 if (path.is_dir()
437 || (path.as_os_str().as_encoded_bytes().ends_with(b"\\")
438 || (!verbatim && path.as_os_str().as_encoded_bytes().ends_with(b"/"))))
439 && !path_str.ends_with('/')
440 {
441 path_str.push('/');
442 }
443 URIString::parse(path_str)
444 }
445 #[cfg(all(not(target_family = "unix"), not(target_family = "windows")))]
446 fn _parse_file_path(path: &Path) -> Result<URIString, ParseRIError> {
447 todo!()
448 }
449 _parse_file_path(path.as_ref())
450 }
451
452 pub fn into_boxed_uri_str(self) -> Box<URIStr> {
453 Box::from(self.as_ref())
454 }
455
456 pub fn normalize(&mut self) {
459 use Component::*;
460
461 let mut uri = String::with_capacity(self.uri.len());
462 let mut paths = vec![];
463 let mut query = None;
464 let mut fragment = None;
465 let mut has_root = false;
466 for comp in self.components() {
467 match comp {
468 Scheme(scheme) => {
469 uri.push_str(&scheme.to_ascii_lowercase());
470 uri.push(':');
471 }
472 Authority {
473 userinfo,
474 host,
475 port,
476 } => {
477 uri.push_str("//");
478 if let Some(userinfo) = userinfo {
479 uri.push_str(userinfo);
480 uri.push('@');
481 }
482 uri.push_str(host);
483 if let Some(port) = port {
484 uri.push(':');
485 uri.push_str(port);
486 }
487 }
488 RootSegment => has_root = true,
489 Segment(segment) => paths.push(segment),
490 Query(q) => query = Some(q),
491 Fragment(f) => fragment = Some(f),
492 }
493 }
494 build_normalized_path(paths.into_iter(), has_root, &mut uri);
495 if let Some(query) = query {
496 uri.push('?');
497 uri.push_str(query);
498 }
499 if let Some(fragment) = fragment {
500 uri.push('#');
501 uri.push_str(fragment);
502 }
503 self.uri = uri;
504 }
505}
506
507impl AsRef<URIStr> for URIString {
508 fn as_ref(&self) -> &URIStr {
509 URIStr::new(&self.uri)
510 }
511}
512
513impl Borrow<URIStr> for URIString {
514 fn borrow(&self) -> &URIStr {
515 self.as_ref()
516 }
517}
518
519impl Deref for URIString {
520 type Target = URIStr;
521
522 fn deref(&self) -> &Self::Target {
523 self.as_ref()
524 }
525}
526
527macro_rules! impl_convertion_uri_string {
528 ($( $t:ty ),*) => {
529 $(
530 impl From<URIString> for $t {
531 fn from(value: URIString) -> $t {
532 From::from(value.as_ref())
533 }
534 }
535 )*
536 };
537}
538impl_convertion_uri_string!(Box<URIStr>, Rc<URIStr>, Arc<URIStr>);
539
540fn build_normalized_path<'a>(
541 segments: impl Iterator<Item = &'a str>,
542 has_root: bool,
543 buffer: &mut String,
544) {
545 let segments = normalize_path_segments(segments, has_root);
546 if has_root {
547 buffer.push('/');
548 }
549 for (i, seg) in segments.into_iter().enumerate() {
550 if i > 0 {
551 buffer.push('/');
552 }
553 buffer.push_str(seg);
554 }
555}
556
557fn normalize_path_segments<'a>(
558 segments: impl Iterator<Item = &'a str>,
559 has_root: bool,
560) -> Vec<&'a str> {
561 let mut stack = vec![];
562 let mut last_dot = false;
563 for seg in segments {
564 if seg == "." {
565 last_dot = true;
567 } else if seg == ".." {
568 if !stack.is_empty() && stack.last() != Some(&"..") {
569 stack.pop();
570 } else if !has_root {
571 stack.push(seg);
572 }
573 last_dot = true;
574 } else {
575 stack.push(seg);
576 last_dot = false;
577 }
578 }
579
580 if last_dot {
581 stack.push("");
582 }
583
584 stack
585}
586
587fn parse_uri_reference(b: &mut &[u8]) -> Result<(), ParseRIError> {
594 if b.is_empty() || matches!(b[0], b'/' | b'?' | b'#') {
595 parse_relative_ref(b)
598 } else {
599 if !b[0].is_ascii_alphabetic() {
603 parse_relative_ref(b)
606 } else {
607 if let Some(&c) = b
611 .iter()
612 .find(|&&b| !b.is_ascii_alphanumeric() && !matches!(b, b'+' | b'-' | b'.'))
613 && c == b':'
614 {
615 parse_uri(b)
616 } else {
617 parse_relative_ref(b)
618 }
619 }
620 }
621}
622
623fn parse_uri(b: &mut &[u8]) -> Result<(), ParseRIError> {
630 parse_scheme(b)?;
631 *b = b
632 .strip_prefix(b":")
633 .ok_or(ParseRIError::InvalidSchemeSeparator)?;
634 parse_hier_part(b)?;
635 if let Some(query) = b.strip_prefix(b"?") {
636 *b = query;
637 parse_query(b)?;
638 }
639 if let Some(fragment) = b.strip_prefix(b"#") {
640 *b = fragment;
641 parse_fragment(b)?;
642 }
643 Ok(())
644}
645
646fn parse_scheme(b: &mut &[u8]) -> Result<(), ParseRIError> {
653 if b.is_empty() || !b[0].is_ascii_alphabetic() {
654 return Err(ParseRIError::InvalidScheme);
655 }
656 let pos = b
657 .iter()
658 .position(|&b| !b.is_ascii_alphanumeric() && !matches!(b, b'+' | b'-' | b'.'))
659 .unwrap_or(b.len());
660 *b = &b[pos..];
661 Ok(())
662}
663
664fn parse_hier_part(b: &mut &[u8]) -> Result<(), ParseRIError> {
674 if let Some(rem) = b.strip_prefix(b"/") {
675 if let Some(rem) = rem.strip_prefix(b"/") {
678 *b = rem;
682 parse_authority(b)?;
683 parse_path_abempty(b)
684 } else {
685 parse_path_absolute(b)
688 }
689 } else {
690 let mut dum = *b;
692 if parse_pchar(&mut dum).is_ok() {
693 parse_path_rootless(b)
695 } else {
696 Ok(())
700 }
701 }
702}
703
704fn parse_authority(b: &mut &[u8]) -> Result<(), ParseRIError> {
711 if b.starts_with(b"[") {
712 parse_ip_literal(b)?;
714 if let Some(rem) = b.strip_prefix(b":") {
715 *b = rem;
716 parse_port(b)?;
717 }
718 return Ok(());
719 }
720
721 let mut colon = usize::MAX;
747 let mut now = 0;
748 let mut t = *b;
749 while !t.is_empty() {
750 let pos = t
751 .iter()
752 .position(|&b| !is_unreserved(b) && !is_sub_delims(b) && b != b'%')
753 .unwrap_or(t.len());
754 t = &t[pos..];
755 now += pos;
756 if let Some(rem) = t.strip_prefix(b":") {
757 now += 1;
758 t = rem;
759 colon = colon.min(now);
760 } else {
761 break;
762 }
763 }
764
765 debug_assert_eq!(now, b.len() - t.len());
766
767 if let Some(rem) = t.strip_prefix(b"@") {
768 *b = rem;
769 parse_host(b)?;
770 if let Some(rem) = b.strip_prefix(b":") {
771 *b = rem;
772 parse_port(b)?;
773 }
774 Ok(())
775 } else if t.starts_with(b"[") {
776 Err(ParseRIError::InvalidAuthority)
777 } else if colon < usize::MAX {
778 *b = &b[colon + 1..];
779 parse_port(b)
780 } else {
781 *b = t;
782 Ok(())
783 }
784}
785
786fn parse_host(b: &mut &[u8]) -> Result<(), ParseRIError> {
804 if b.starts_with(b"[") {
805 parse_ip_literal(b)
806 } else {
807 parse_reg_name(b)
809 }
810}
811
812fn parse_ip_literal(b: &mut &[u8]) -> Result<(), ParseRIError> {
819 *b = b.strip_prefix(b"[").ok_or(ParseRIError::InvalidIPLiteral)?;
820 if !b.is_empty() && b[0].eq_ignore_ascii_case(&b'v') {
821 parse_ipv_future(b)?;
822 } else {
823 parse_ipv6_address(b)?;
824 }
825 *b = b.strip_prefix(b"]").ok_or(ParseRIError::InvalidIPLiteral)?;
826 Ok(())
827}
828
829fn parse_ipv_future(b: &mut &[u8]) -> Result<(), ParseRIError> {
836 if b.is_empty() || !b[0].eq_ignore_ascii_case(&b'v') {
837 return Err(ParseRIError::InvalidIPvFuture);
838 }
839 *b = &b[1..];
840 let pos = b
841 .iter()
842 .position(|&b| !b.is_ascii_hexdigit())
843 .unwrap_or(b.len());
844 if !(1..=b.len() - 2).contains(&pos) {
845 return Err(ParseRIError::InvalidIPvFuture);
846 }
847 *b = &b[pos..];
848 *b = b.strip_prefix(b".").ok_or(ParseRIError::InvalidIPvFuture)?;
849 let pos = b
850 .iter()
851 .position(|&b| !is_unreserved(b) && !is_sub_delims(b) && b != b':')
852 .unwrap_or(b.len());
853 if pos == 0 {
854 return Err(ParseRIError::InvalidIPvFuture);
855 }
856 *b = &b[pos..];
857 Ok(())
858}
859
860fn parse_ipv6_address(b: &mut &[u8]) -> Result<(), ParseRIError> {
879 let mut cnt = 1;
880 let mut omit = false;
881 if let Some(rem) = b.strip_prefix(b":") {
882 *b = rem;
883 omit = true;
884 } else {
885 parse_h16(b)?;
886 }
887
888 while cnt + (omit as i32) < 8
889 && let Some(rem) = b.strip_prefix(b":")
890 {
891 *b = rem;
892 if b.starts_with(b":") {
893 if omit {
894 return Err(ParseRIError::InvalidIPv6address);
895 }
896 omit = true;
897 cnt += 1;
898 continue;
899 }
900
901 let mut dum = *b;
909 if parse_ipv4_address(&mut dum).is_ok() {
910 *b = dum;
911 cnt += 2;
913 break;
915 } else if !b.is_empty() && b[0].is_ascii_hexdigit() {
916 parse_h16(b)?;
917 }
918 }
919
920 if (omit && cnt <= 8) || (!omit && cnt == 8) {
923 Ok(())
924 } else {
925 Err(ParseRIError::InvalidIPv6address)
926 }
927}
928
929fn parse_h16(b: &mut &[u8]) -> Result<(), ParseRIError> {
937 let pos = b
938 .iter()
939 .position(|&b| !b.is_ascii_hexdigit())
940 .unwrap_or(b.len());
941 if pos == 0 {
942 Err(ParseRIError::InvalidH16)
943 } else {
944 *b = &b[pos.min(4)..];
945 Ok(())
946 }
947}
948
949fn parse_ipv4_address(b: &mut &[u8]) -> Result<(), ParseRIError> {
961 parse_dec_octet(b)?;
962 for _ in 0..3 {
963 *b = b.strip_prefix(b".").ok_or(ParseRIError::InvalidDecOctet)?;
964 parse_dec_octet(b)?;
965 }
966 Ok(())
967}
968fn parse_dec_octet(b: &mut &[u8]) -> Result<(), ParseRIError> {
969 let len = match b {
970 [b'2', b'5', b'0'..=b'5', ..] => 3,
971 [b'2', b'0'..=b'4', b'0'..=b'9', ..] => 3,
972 [b'1', b'0'..=b'9', b'0'..=b'9', ..] => 3,
973 [b'1'..=b'9', b'0'..=b'9', ..] => 2,
974 [b'0'..=b'9', ..] => 1,
975 _ => return Err(ParseRIError::InvalidDecOctet),
976 };
977 *b = &b[len..];
978 Ok(())
979}
980
981fn parse_reg_name(b: &mut &[u8]) -> Result<(), ParseRIError> {
988 while !b.is_empty() && !matches!(b[0], b':' | b'@') && parse_pchar(b).is_ok() {}
991 Ok(())
992}
993
994fn parse_port(b: &mut &[u8]) -> Result<(), ParseRIError> {
1001 let pos = b
1002 .iter()
1003 .position(|&b| !b.is_ascii_digit())
1004 .unwrap_or(b.len());
1005 *b = &b[pos..];
1006 Ok(())
1007}
1008
1009fn parse_path_abempty(b: &mut &[u8]) -> Result<(), ParseRIError> {
1016 while let Some(rem) = b.strip_prefix(b"/") {
1017 *b = rem;
1018 parse_segment(b)?;
1019 }
1020 Ok(())
1021}
1022
1023fn parse_path_absolute(b: &mut &[u8]) -> Result<(), ParseRIError> {
1030 *b = b
1031 .strip_prefix(b"/")
1032 .ok_or(ParseRIError::InvalidPathAbsolute)?;
1033 if parse_segment_nz(b).is_ok() {
1034 while let Some(rem) = b.strip_prefix(b"/") {
1035 *b = rem;
1036 parse_segment(b)?;
1037 }
1038 }
1039 Ok(())
1040}
1041
1042fn parse_path_noscheme(b: &mut &[u8]) -> Result<(), ParseRIError> {
1049 parse_segment_nz_nc(b)?;
1050 while let Some(rem) = b.strip_prefix(b"/") {
1051 *b = rem;
1052 parse_segment(b)?;
1053 }
1054 Ok(())
1055}
1056
1057fn parse_path_rootless(b: &mut &[u8]) -> Result<(), ParseRIError> {
1064 parse_segment_nz(b)?;
1065 while let Some(rem) = b.strip_prefix(b"/") {
1066 *b = rem;
1067 parse_segment(b)?;
1068 }
1069 Ok(())
1070}
1071
1072fn parse_segment(b: &mut &[u8]) -> Result<(), ParseRIError> {
1090 while parse_pchar(b).is_ok() {}
1091 Ok(())
1092}
1093
1094fn parse_segment_nz(b: &mut &[u8]) -> Result<(), ParseRIError> {
1101 parse_pchar(b)?;
1102 while parse_pchar(b).is_ok() {}
1103 Ok(())
1104}
1105
1106fn parse_segment_nz_nc(b: &mut &[u8]) -> Result<(), ParseRIError> {
1114 if b.is_empty() || b[0] == b':' || parse_pchar(b).is_err() {
1115 return Err(ParseRIError::InvalidSegmentNzNc);
1116 }
1117 while !b.is_empty() && b[0] != b':' && parse_pchar(b).is_ok() {}
1118 Ok(())
1119}
1120
1121fn parse_pchar(b: &mut &[u8]) -> Result<(), ParseRIError> {
1128 if b.is_empty() {
1129 return Err(ParseRIError::InvalidPChar);
1130 }
1131
1132 if is_unreserved(b[0]) || is_sub_delims(b[0]) || matches!(b[0], b':' | b'@') {
1133 *b = &b[1..];
1134 Ok(())
1135 } else if b.len() >= 3 && b[0] == b'%' && b[1].is_ascii_hexdigit() && b[2].is_ascii_hexdigit() {
1136 *b = &b[3..];
1137 Ok(())
1138 } else {
1139 Err(ParseRIError::InvalidPChar)
1140 }
1141}
1142
1143fn parse_query(b: &mut &[u8]) -> Result<(), ParseRIError> {
1150 loop {
1151 if let Some(rem) = b.strip_prefix(b"/") {
1152 *b = rem;
1153 } else if let Some(rem) = b.strip_prefix(b"?") {
1154 *b = rem;
1155 } else if parse_pchar(b).is_ok() {
1156 } else {
1158 break Ok(());
1159 }
1160 }
1161}
1162
1163fn parse_fragment(b: &mut &[u8]) -> Result<(), ParseRIError> {
1170 loop {
1171 if let Some(rem) = b.strip_prefix(b"/") {
1172 *b = rem;
1173 } else if let Some(rem) = b.strip_prefix(b"?") {
1174 *b = rem;
1175 } else if parse_pchar(b).is_ok() {
1176 } else {
1178 break Ok(());
1179 }
1180 }
1181}
1182
1183fn parse_relative_ref(b: &mut &[u8]) -> Result<(), ParseRIError> {
1190 parse_relative_part(b)?;
1191 if let Some(query) = b.strip_prefix(b"?") {
1192 *b = query;
1193 parse_query(b)?;
1194 }
1195 if let Some(fragment) = b.strip_prefix(b"#") {
1196 *b = fragment;
1197 parse_fragment(b)?;
1198 }
1199 Ok(())
1200}
1201
1202fn parse_relative_part(b: &mut &[u8]) -> Result<(), ParseRIError> {
1212 if let Some(rem) = b.strip_prefix(b"/") {
1213 if let Some(rem) = rem.strip_prefix(b"/") {
1214 *b = rem;
1215 parse_authority(b)?;
1216 parse_path_abempty(b)
1217 } else {
1218 parse_path_absolute(b)
1219 }
1220 } else {
1221 let orig = b.len();
1222 let ret = parse_path_noscheme(b);
1223 if orig == b.len() { Ok(()) } else { ret }
1225 }
1226}
1227
1228fn is_reserved(b: u8) -> bool {
1235 is_gen_delims(b) || is_sub_delims(b)
1236}
1237
1238fn is_gen_delims(b: u8) -> bool {
1245 matches!(b, b':' | b'/' | b'?' | b'#' | b'[' | b']' | b'@')
1246}
1247
1248fn is_sub_delims(b: u8) -> bool {
1255 matches!(
1256 b,
1257 b'!' | b'$' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+' | b',' | b';' | b'='
1258 )
1259}
1260
1261fn is_unreserved(b: u8) -> bool {
1268 b.is_ascii_alphanumeric() || matches!(b, b'-' | b'.' | b'_' | b'~')
1269}
1270
1271const LUT_BYTES: [u8; 256 * 3] = {
1272 const fn digit_to_hex_char(b: u8) -> u8 {
1273 if b < 10 { b + b'0' } else { b - 10 + b'A' }
1274 }
1275 let mut buf = [0u8; 256 * 3];
1276 let mut i = 0;
1277 while i < 256 {
1278 buf[3 * i] = b'%';
1279 let hi = (i as u8 >> 4) & 0xF;
1280 let lo = i as u8 & 0xF;
1281 buf[3 * i + 1] = digit_to_hex_char(hi);
1282 buf[3 * i + 2] = digit_to_hex_char(lo);
1283 i += 1;
1284 }
1285 buf
1286};
1287const LUT: &str = unsafe {
1288 from_utf8_unchecked(&LUT_BYTES)
1292};
1293
1294pub fn escape(s: &str) -> Cow<'_, str> {
1295 escape_except(s, |_| false)
1296}
1297
1298pub fn escape_bytes(b: &[u8]) -> Cow<'_, [u8]> {
1299 escape_bytes_except(b, |_| false)
1300}
1301
1302pub fn escape_except(s: &str, is_except: impl Fn(char) -> bool) -> Cow<'_, str> {
1303 let cap = s
1304 .chars()
1305 .filter_map(|c| (!is_except(c)).then_some(c.len_utf8() * 2))
1306 .sum::<usize>();
1307 if cap == 0 {
1308 return Cow::Borrowed(s);
1309 }
1310 let mut encode = [0; 6];
1311 let mut buf = String::with_capacity(s.len() + cap);
1312 for c in s.chars() {
1313 if is_except(c) {
1314 buf.push(c);
1315 } else {
1316 let encoded = c.encode_utf8(&mut encode);
1317 for b in encoded.bytes() {
1318 let index = b as usize * 3;
1319 buf.push_str(&LUT[index..index + 3]);
1320 }
1321 }
1322 }
1323 Cow::Owned(buf)
1324}
1325
1326pub fn escape_bytes_except(b: &[u8], is_except: impl Fn(u8) -> bool) -> Cow<'_, [u8]> {
1327 let cap = b.iter().copied().filter(|&b| !is_except(b)).count() * 2;
1328 if cap == 0 {
1329 return Cow::Borrowed(b);
1330 }
1331 let mut buf = Vec::with_capacity(b.len() + cap);
1332 for &b in b {
1333 if is_except(b) {
1334 buf.push(b);
1335 } else {
1336 let index = b as usize * 3;
1337 buf.extend_from_slice(&LUT_BYTES[index..index + 3]);
1338 }
1339 }
1340 Cow::Owned(buf)
1341}
1342
1343pub enum URIUnescapeError {
1344 InvalidEscape,
1345 Utf8Error(std::str::Utf8Error),
1346}
1347
1348impl From<std::str::Utf8Error> for URIUnescapeError {
1349 fn from(value: std::str::Utf8Error) -> Self {
1350 Self::Utf8Error(value)
1351 }
1352}
1353
1354pub fn unescape(s: &str) -> Result<Cow<'_, str>, URIUnescapeError> {
1355 if !s.contains('%') {
1356 return Ok(Cow::Borrowed(s));
1357 }
1358
1359 let mut split = s.split('%');
1360 let mut buf = String::with_capacity(s.len());
1361 buf.push_str(split.next().unwrap());
1362 let mut bytes = vec![];
1363 for chunk in split {
1364 if chunk.len() < 2 {
1365 return Err(URIUnescapeError::InvalidEscape);
1366 }
1367 let byte =
1368 u8::from_str_radix(&chunk[..2], 16).map_err(|_| URIUnescapeError::InvalidEscape)?;
1369 bytes.push(byte);
1370
1371 if chunk.len() > 2 {
1372 buf.push_str(from_utf8(&bytes)?);
1373 buf.push_str(&chunk[2..]);
1374 bytes.clear();
1375 }
1376 }
1377
1378 if !bytes.is_empty() {
1379 buf.push_str(from_utf8(&bytes)?);
1380 }
1381 Ok(Cow::Owned(buf))
1382}
1383
1384pub fn unescape_bytes(b: &[u8]) -> Result<Cow<'_, [u8]>, URIUnescapeError> {
1385 if !b.contains(&b'%') {
1386 return Ok(Cow::Borrowed(b));
1387 }
1388
1389 let mut split = b.split(|&b| b == b'%');
1390 let mut buf = Vec::with_capacity(b.len());
1391 buf.extend_from_slice(split.next().unwrap());
1392
1393 fn hexdigit_to_byte(hex: u8) -> u8 {
1394 if hex.is_ascii_digit() {
1395 hex - b'0'
1396 } else if hex.is_ascii_uppercase() {
1397 hex - b'A' + 10
1398 } else {
1399 hex - b'a' + 10
1400 }
1401 }
1402 for chunk in split {
1403 if chunk.len() < 2 || !chunk[0].is_ascii_hexdigit() || !chunk[1].is_ascii_hexdigit() {
1404 return Err(URIUnescapeError::InvalidEscape);
1405 }
1406 let hi = hexdigit_to_byte(chunk[0]);
1407 let lo = hexdigit_to_byte(chunk[1]);
1408 buf.push((hi << 4) | lo);
1409 }
1410 Ok(Cow::Owned(buf))
1411}
1412
1413#[derive(Debug, Clone, Copy)]
1414enum DecomposeState {
1415 Scheme,
1416 Authority,
1417 Root,
1418 Path,
1419 Query,
1420 Fragment,
1421 Finish,
1422}
1423
1424pub struct Components<'a> {
1425 state: DecomposeState,
1426 uri: &'a str,
1427}
1428
1429impl Components<'_> {
1430 fn new(uri: &str) -> Components<'_> {
1431 Components {
1432 state: DecomposeState::Scheme,
1433 uri,
1434 }
1435 }
1436}
1437
1438impl<'a> Iterator for Components<'a> {
1439 type Item = Component<'a>;
1440
1441 fn next(&mut self) -> Option<Self::Item> {
1442 use DecomposeState::*;
1443 loop {
1444 match self.state {
1445 Scheme => {
1446 self.state = Authority;
1447 let mut bytes = self.uri.as_bytes();
1448 if parse_scheme(&mut bytes).is_ok() && bytes.starts_with(b":") {
1449 let len = self.uri.len() - bytes.len();
1450 let (scheme, rem) = self.uri.split_at(len);
1451 self.uri = &rem[1..];
1452 break Some(Component::Scheme(scheme));
1453 }
1454 }
1455 Authority => {
1456 self.state = Root;
1457 if let Some(rem) = self.uri.strip_prefix("//") {
1458 let pos = rem.bytes().position(|b| b == b'/').unwrap_or(rem.len());
1459 let (mut authority, rem) = rem.split_at(pos);
1460 self.uri = rem;
1461 let mut userinfo = None;
1462 if let Some((ui, rem)) = authority.split_once('@') {
1463 userinfo = Some(ui);
1464 authority = rem;
1465 }
1466 let mut port = None;
1467 if let Some((host, p)) = authority.rsplit_once(':')
1468 && p.bytes().all(|b| b.is_ascii_digit())
1469 {
1470 port = Some(p);
1471 authority = host;
1472 }
1473 break Some(Component::Authority {
1474 userinfo,
1475 host: authority,
1476 port,
1477 });
1478 }
1479 }
1480 Root => {
1481 self.state = Path;
1482 if let Some(rem) = self.uri.strip_prefix('/') {
1483 self.uri = rem;
1484 break Some(Component::RootSegment);
1485 }
1486 }
1487 Path => {
1488 let pos = self
1489 .uri
1490 .bytes()
1491 .position(|b| b == b'/' || b == b'?' || b == b'#')
1492 .unwrap_or(self.uri.len());
1493 let (segment, rem) = self.uri.split_at(pos);
1494 if let Some(rem) = rem.strip_prefix('/') {
1495 self.uri = rem;
1496 } else {
1497 self.uri = rem;
1498 self.state = Query;
1499 }
1500 break Some(Component::Segment(segment));
1501 }
1502 Query => {
1503 self.state = Fragment;
1504 if let Some(rem) = self.uri.strip_prefix('?') {
1505 let pos = rem.bytes().position(|b| b == b'#').unwrap_or(rem.len());
1506 let (query, rem) = rem.split_at(pos);
1507 self.uri = rem;
1508 break Some(Component::Query(query));
1509 }
1510 }
1511 Fragment => {
1512 debug_assert!(self.uri.is_empty() || self.uri.starts_with('#'));
1513 self.state = Finish;
1514 if !self.uri.is_empty() {
1515 let (_, frag) = self.uri.split_at(1);
1516 self.uri = "";
1517 break Some(Component::Fragment(frag));
1518 }
1519 }
1520 Finish => break None,
1521 }
1522 }
1523 }
1524}
1525
1526pub enum Component<'a> {
1527 Scheme(&'a str),
1528 Authority {
1529 userinfo: Option<&'a str>,
1530 host: &'a str,
1531 port: Option<&'a str>,
1532 },
1533 RootSegment,
1534 Segment(&'a str),
1535 Query(&'a str),
1536 Fragment(&'a str),
1537}