1use std::{
2 borrow::{Borrow, Cow},
3 ops::Deref,
4 path::Path,
5 rc::Rc,
6 str::{from_utf8, from_utf8_unchecked},
7 sync::Arc,
8};
9
10use crate::ParseRIError;
11
12#[derive(Debug, PartialEq, Eq, Hash)]
13#[repr(transparent)]
14pub struct URIStr {
15 uri: str,
16}
17
18impl URIStr {
19 fn new(s: &str) -> &Self {
20 unsafe {
21 &*(s as *const str as *const Self)
25 }
26 }
27
28 pub fn resolve(&self, reference: &Self) -> URIString {
31 use Component::*;
32
33 assert!(
34 self.is_absolute(),
35 "'{}' is not absolute",
36 self.as_escaped_str()
37 );
38
39 let mut ref_components = reference.components().peekable();
40 if ref_components
41 .next_if(|comp| matches!(comp, Scheme(_)))
42 .is_some()
43 {
44 let mut ret = reference.to_owned();
45 ret.normalize();
46 return ret;
47 }
48
49 if ref_components
50 .next_if(|comp| matches!(comp, Authority { .. }))
51 .is_some()
52 {
53 let mut ret = URIString {
55 uri: [self.scheme().unwrap(), ":", &reference.uri].concat(),
56 };
57 ret.normalize();
58 return ret;
59 }
60
61 let mut components = self.components().peekable();
62 let mut uri = String::new();
63 if let Some(Scheme(scheme)) = components.next_if(|comp| matches!(comp, Scheme(_))) {
64 uri.push_str(scheme);
65 uri.push(':');
66 }
67 if let Some(Authority {
68 userinfo,
69 host,
70 port,
71 }) = components.next_if(|comp| matches!(comp, Authority { .. }))
72 {
73 uri.push_str("//");
74 if let Some(userinfo) = userinfo {
75 uri.push_str(userinfo);
76 uri.push(':');
77 }
78 uri.push_str(host);
79 if let Some(port) = port {
80 uri.push(':');
81 uri.push_str(port);
82 }
83 }
84
85 if ref_components
86 .next_if(|comp| matches!(comp, RootSegment))
87 .is_some()
88 {
89 uri.push_str(&reference.uri);
90 let mut ret = URIString { uri };
91 ret.normalize();
92 return ret;
93 }
94
95 let mut segments = vec![];
96 let has_root = components
97 .next_if(|comp| matches!(comp, RootSegment))
98 .is_some();
99 let mut has_dot_segment = false;
100 while let Some(Segment(segment)) = components.next_if(|comp| matches!(comp, Segment(_))) {
101 segments.push(segment);
102 has_dot_segment |= segment == "." || segment == "..";
103 }
104 if has_dot_segment {
105 segments = normalize_path_segments(segments.into_iter(), has_root);
106 }
107
108 let mut has_path = false;
109 if let Some(Segment(segment)) = ref_components.next_if(|comp| matches!(comp, Segment(_))) {
110 let mut buf = vec![segment];
111 while let Some(Segment(segment)) =
112 ref_components.next_if(|comp| matches!(comp, Segment(_)))
113 {
114 buf.push(segment);
115 }
116 if buf.len() > 1 || !buf[0].is_empty() {
117 segments.pop();
118 segments.extend(buf);
119 has_path = true;
120 }
121 }
122 build_normalized_path(segments.into_iter(), has_root, &mut uri);
123
124 if let Some(Query(query)) = ref_components.next_if(|comp| matches!(comp, Query(_))) {
125 uri.push('?');
126 uri.push_str(query);
127 } else if !has_path
128 && let Some(Query(query)) = components.next_if(|comp| matches!(comp, Query(_)))
129 {
130 uri.push('?');
131 uri.push_str(query);
132 }
133
134 if let Some(Fragment(fragment)) = ref_components.next() {
135 uri.push('#');
136 uri.push_str(fragment);
137 }
138
139 URIString { uri }
140 }
141
142 pub fn as_escaped_str(&self) -> &str {
143 &self.uri
144 }
145
146 pub fn as_unescaped_str(&self) -> Option<Cow<'_, str>> {
147 unescape(&self.uri).ok()
148 }
149
150 pub fn is_absolute(&self) -> bool {
153 self.scheme().is_some() && self.fragment().is_none()
154 }
155
156 pub fn is_relative(&self) -> bool {
159 self.scheme().is_none()
160 }
161
162 pub fn scheme(&self) -> Option<&str> {
163 let pos = self.uri.bytes().position(is_reserved)?;
164 (self.uri.as_bytes()[pos] == b':').then_some(&self.uri[..pos])
165 }
166
167 pub fn authority(&self) -> Option<&str> {
168 let rem = self
169 .uri
170 .strip_prefix("//")
171 .or_else(|| self.uri.split_once("://").map(|p| p.1))?;
172 Some(rem.split_once('/').map(|p| p.0).unwrap_or(rem))
173 }
174
175 pub fn userinfo(&self) -> Option<&str> {
176 Some(self.authority()?.split_once('@')?.0)
177 }
178
179 pub fn host(&self) -> Option<&str> {
180 let mut auth = self.authority()?;
181 if let Some((_userinfo, rem)) = auth.split_once('@') {
182 auth = rem;
183 }
184 if let Some((host, port)) = auth.rsplit_once(':')
185 && port.bytes().all(|b| b.is_ascii_digit())
186 {
187 auth = host;
188 }
189 Some(auth)
190 }
191
192 pub fn port(&self) -> Option<&str> {
193 let (_, port) = self.authority()?.rsplit_once(':')?;
194 port.bytes().all(|b| b.is_ascii_digit()).then_some(port)
195 }
196
197 pub fn path(&self) -> &str {
198 let mut path = &self.uri;
199 if let Some(scheme) = self.scheme() {
200 path = &path[scheme.len() + 1..];
202 }
203 if let Some(rem) = path.strip_prefix("//") {
204 let pos = rem.bytes().position(|b| b == b'/').unwrap_or(rem.len());
206 path = &rem[pos..]
207 }
208
209 path.split_once(['?', '#']).map(|p| p.0).unwrap_or(path)
210 }
211
212 pub fn query(&self) -> Option<&str> {
213 let pos = self.uri.bytes().position(|b| b == b'?' || b == b'#')?;
214 if self.uri.as_bytes()[pos] == b'#' {
215 return None;
216 }
217 let query = &self.uri[pos + 1..];
218 let pos = query.bytes().position(|b| b == b'#').unwrap_or(query.len());
219 Some(&query[..pos])
220 }
221
222 pub fn fragment(&self) -> Option<&str> {
223 let pos = self.uri.bytes().position(|b| b == b'#')?;
224 Some(&self.uri[pos + 1..])
225 }
226
227 pub fn components(&self) -> Components<'_> {
228 Components::new(&self.uri)
229 }
230}
231
232impl ToOwned for URIStr {
233 type Owned = URIString;
234
235 fn to_owned(&self) -> Self::Owned {
236 URIString {
237 uri: self.uri.to_owned(),
238 }
239 }
240}
241
242impl From<&URIStr> for URIString {
243 fn from(value: &URIStr) -> Self {
244 value.to_owned()
245 }
246}
247
248impl Clone for Box<URIStr> {
249 fn clone(&self) -> Self {
250 self.as_ref().into()
251 }
252}
253
254macro_rules! impl_boxed_convertion_uri_str {
255 ($( $t:ident ),*) => {
256 $(
257 impl From<&URIStr> for $t<URIStr> {
258 fn from(value: &URIStr) -> Self {
259 let boxed: $t<str> = value.uri.into();
260 unsafe {
261 std::mem::transmute(boxed)
265 }
266 }
267 }
268 )*
269 };
270}
271impl_boxed_convertion_uri_str!(Box, Rc, Arc);
272
273#[derive(Debug, Clone, PartialEq, Eq, Hash)]
274#[repr(transparent)]
275pub struct URIString {
276 uri: String,
287}
288
289impl URIString {
290 pub fn parse(uri: impl AsRef<str>) -> Result<Self, ParseRIError> {
291 fn _parse(uri: &str) -> Result<URIString, ParseRIError> {
292 let uri = escape_except(uri, |b| {
293 b.is_ascii() && (is_reserved(b as u8) || is_unreserved(b as u8))
294 });
295 let mut bytes = uri.as_bytes();
296 parse_uri_reference(&mut bytes)?;
297 if !bytes.is_empty() {
298 Err(ParseRIError::NotTermination)
299 } else {
300 Ok(URIString {
301 uri: uri.into_owned(),
302 })
303 }
304 }
305 _parse(uri.as_ref())
306 }
307
308 pub fn parse_file_path(path: impl AsRef<Path>) -> Result<Self, ParseRIError> {
314 #[cfg(target_family = "unix")]
315 fn _parse_file_path(path: &Path) -> Result<URIString, ParseRIError> {
316 let mut path_str = path.to_str().ok_or(ParseRIError::Unsupported)?.to_owned();
317 if path.is_dir() && !path_str.ends_with("/") {
318 path_str.push('/');
319 }
320 if path.is_absolute() {
321 path_str.insert_str(0, "file://");
322 }
323 URIString::parse(path_str)
324 }
325 #[cfg(not(target_family = "unix"))]
326 fn _parse_file_path(path: &Path) -> Result<URIString, ParseRIError> {
327 todo!()
328 }
329 _parse_file_path(path.as_ref())
330 }
331
332 pub fn into_boxed_uri_str(self) -> Box<URIStr> {
333 Box::from(self.as_ref())
334 }
335
336 pub fn normalize(&mut self) {
339 use Component::*;
340
341 let mut uri = String::with_capacity(self.uri.len());
342 let mut paths = vec![];
343 let mut query = None;
344 let mut fragment = None;
345 let mut has_root = false;
346 for comp in self.components() {
347 match comp {
348 Scheme(scheme) => {
349 uri.push_str(&scheme.to_ascii_lowercase());
350 uri.push(':');
351 }
352 Authority {
353 userinfo,
354 host,
355 port,
356 } => {
357 uri.push_str("//");
358 if let Some(userinfo) = userinfo {
359 uri.push_str(userinfo);
360 uri.push('@');
361 }
362 uri.push_str(host);
363 if let Some(port) = port {
364 uri.push(':');
365 uri.push_str(port);
366 }
367 }
368 RootSegment => has_root = true,
369 Segment(segment) => paths.push(segment),
370 Query(q) => query = Some(q),
371 Fragment(f) => fragment = Some(f),
372 }
373 }
374 build_normalized_path(paths.into_iter(), has_root, &mut uri);
375 if let Some(query) = query {
376 uri.push('?');
377 uri.push_str(query);
378 }
379 if let Some(fragment) = fragment {
380 uri.push('#');
381 uri.push_str(fragment);
382 }
383 self.uri = uri;
384 }
385}
386
387impl AsRef<URIStr> for URIString {
388 fn as_ref(&self) -> &URIStr {
389 URIStr::new(&self.uri)
390 }
391}
392
393impl Borrow<URIStr> for URIString {
394 fn borrow(&self) -> &URIStr {
395 self.as_ref()
396 }
397}
398
399impl Deref for URIString {
400 type Target = URIStr;
401
402 fn deref(&self) -> &Self::Target {
403 self.as_ref()
404 }
405}
406
407macro_rules! impl_convertion_uri_string {
408 ($( $t:ty ),*) => {
409 $(
410 impl From<URIString> for $t {
411 fn from(value: URIString) -> $t {
412 From::from(value.as_ref())
413 }
414 }
415 )*
416 };
417}
418impl_convertion_uri_string!(Box<URIStr>, Rc<URIStr>, Arc<URIStr>);
419
420fn build_normalized_path<'a>(
421 segments: impl Iterator<Item = &'a str>,
422 has_root: bool,
423 buffer: &mut String,
424) {
425 let segments = normalize_path_segments(segments, has_root);
426 if has_root {
427 buffer.push('/');
428 }
429 for (i, seg) in segments.into_iter().enumerate() {
430 if i > 0 {
431 buffer.push('/');
432 }
433 buffer.push_str(seg);
434 }
435}
436
437fn normalize_path_segments<'a>(
438 segments: impl Iterator<Item = &'a str>,
439 has_root: bool,
440) -> Vec<&'a str> {
441 let mut stack = vec![];
442 let mut last_dot = false;
443 for seg in segments {
444 if seg == "." {
445 last_dot = true;
447 } else if seg == ".." {
448 if !stack.is_empty() && stack.last() != Some(&"..") {
449 stack.pop();
450 } else if !has_root {
451 stack.push(seg);
452 }
453 last_dot = true;
454 } else {
455 stack.push(seg);
456 last_dot = false;
457 }
458 }
459
460 if last_dot {
461 stack.push("");
462 }
463
464 stack
465}
466
467fn parse_uri_reference(b: &mut &[u8]) -> Result<(), ParseRIError> {
474 if b.is_empty() || matches!(b[0], b'/' | b'?' | b'#') {
475 parse_relative_ref(b)
478 } else {
479 if !b[0].is_ascii_alphabetic() {
483 parse_relative_ref(b)
486 } else {
487 if let Some(&c) = b
491 .iter()
492 .find(|&&b| !b.is_ascii_alphanumeric() && !matches!(b, b'+' | b'-' | b'.'))
493 && c == b':'
494 {
495 parse_uri(b)
496 } else {
497 parse_relative_ref(b)
498 }
499 }
500 }
501}
502
503fn parse_uri(b: &mut &[u8]) -> Result<(), ParseRIError> {
510 parse_scheme(b)?;
511 *b = b
512 .strip_prefix(b":")
513 .ok_or(ParseRIError::InvalidSchemeSeparator)?;
514 parse_hier_part(b)?;
515 if let Some(query) = b.strip_prefix(b"?") {
516 *b = query;
517 parse_query(b)?;
518 }
519 if let Some(fragment) = b.strip_prefix(b"#") {
520 *b = fragment;
521 parse_fragment(b)?;
522 }
523 Ok(())
524}
525
526fn parse_scheme(b: &mut &[u8]) -> Result<(), ParseRIError> {
533 if b.is_empty() || !b[0].is_ascii_alphabetic() {
534 return Err(ParseRIError::InvalidScheme);
535 }
536 let pos = b
537 .iter()
538 .position(|&b| !b.is_ascii_alphanumeric() && !matches!(b, b'+' | b'-' | b'.'))
539 .unwrap_or(b.len());
540 *b = &b[pos..];
541 Ok(())
542}
543
544fn parse_hier_part(b: &mut &[u8]) -> Result<(), ParseRIError> {
554 if let Some(rem) = b.strip_prefix(b"/") {
555 if let Some(rem) = rem.strip_prefix(b"/") {
558 *b = rem;
562 parse_authority(b)?;
563 parse_path_abempty(b)
564 } else {
565 parse_path_absolute(b)
568 }
569 } else {
570 let mut dum = *b;
572 if parse_pchar(&mut dum).is_ok() {
573 parse_path_rootless(b)
575 } else {
576 Ok(())
580 }
581 }
582}
583
584fn parse_authority(b: &mut &[u8]) -> Result<(), ParseRIError> {
591 if b.starts_with(b"[") {
592 parse_ip_literal(b)?;
594 if let Some(rem) = b.strip_prefix(b":") {
595 *b = rem;
596 parse_port(b)?;
597 }
598 return Ok(());
599 }
600
601 let mut colon = usize::MAX;
627 let mut now = 0;
628 let mut t = *b;
629 while !t.is_empty() {
630 let pos = t
631 .iter()
632 .position(|&b| !is_unreserved(b) && !is_sub_delims(b) && b != b'%')
633 .unwrap_or(t.len());
634 t = &t[pos..];
635 now += pos;
636 if let Some(rem) = t.strip_prefix(b":") {
637 now += 1;
638 t = rem;
639 colon = colon.min(now);
640 } else {
641 break;
642 }
643 }
644
645 debug_assert_eq!(now, b.len() - t.len());
646
647 if let Some(rem) = t.strip_prefix(b"@") {
648 *b = rem;
649 parse_host(b)?;
650 if let Some(rem) = b.strip_prefix(b":") {
651 *b = rem;
652 parse_port(b)?;
653 }
654 Ok(())
655 } else if t.starts_with(b"[") {
656 Err(ParseRIError::InvalidAuthority)
657 } else if colon < usize::MAX {
658 *b = &b[colon + 1..];
659 parse_port(b)
660 } else {
661 *b = t;
662 Ok(())
663 }
664}
665
666fn parse_host(b: &mut &[u8]) -> Result<(), ParseRIError> {
684 if b.starts_with(b"[") {
685 parse_ip_literal(b)
686 } else {
687 parse_reg_name(b)
689 }
690}
691
692fn parse_ip_literal(b: &mut &[u8]) -> Result<(), ParseRIError> {
699 *b = b.strip_prefix(b"[").ok_or(ParseRIError::InvalidIPLiteral)?;
700 if !b.is_empty() && b[0].eq_ignore_ascii_case(&b'v') {
701 parse_ipv_future(b)?;
702 } else {
703 parse_ipv6_address(b)?;
704 }
705 *b = b.strip_prefix(b"]").ok_or(ParseRIError::InvalidIPLiteral)?;
706 Ok(())
707}
708
709fn parse_ipv_future(b: &mut &[u8]) -> Result<(), ParseRIError> {
716 if b.is_empty() || !b[0].eq_ignore_ascii_case(&b'v') {
717 return Err(ParseRIError::InvalidIPvFuture);
718 }
719 *b = &b[1..];
720 let pos = b
721 .iter()
722 .position(|&b| !b.is_ascii_hexdigit())
723 .unwrap_or(b.len());
724 if !(1..=b.len() - 2).contains(&pos) {
725 return Err(ParseRIError::InvalidIPvFuture);
726 }
727 *b = &b[pos..];
728 *b = b.strip_prefix(b".").ok_or(ParseRIError::InvalidIPvFuture)?;
729 let pos = b
730 .iter()
731 .position(|&b| !is_unreserved(b) && !is_sub_delims(b) && b != b':')
732 .unwrap_or(b.len());
733 if pos == 0 {
734 return Err(ParseRIError::InvalidIPvFuture);
735 }
736 *b = &b[pos..];
737 Ok(())
738}
739
740fn parse_ipv6_address(b: &mut &[u8]) -> Result<(), ParseRIError> {
759 let mut cnt = 1;
760 let mut omit = false;
761 if let Some(rem) = b.strip_prefix(b":") {
762 *b = rem;
763 omit = true;
764 } else {
765 parse_h16(b)?;
766 }
767
768 while cnt + (omit as i32) < 8
769 && let Some(rem) = b.strip_prefix(b":")
770 {
771 *b = rem;
772 if b.starts_with(b":") {
773 if omit {
774 return Err(ParseRIError::InvalidIPv6address);
775 }
776 omit = true;
777 cnt += 1;
778 continue;
779 }
780
781 let mut dum = *b;
789 if parse_ipv4_address(&mut dum).is_ok() {
790 *b = dum;
791 cnt += 2;
793 break;
795 } else if !b.is_empty() && b[0].is_ascii_hexdigit() {
796 parse_h16(b)?;
797 }
798 }
799
800 if (omit && cnt <= 8) || (!omit && cnt == 8) {
803 Ok(())
804 } else {
805 Err(ParseRIError::InvalidIPv6address)
806 }
807}
808
809fn parse_h16(b: &mut &[u8]) -> Result<(), ParseRIError> {
817 let pos = b
818 .iter()
819 .position(|&b| !b.is_ascii_hexdigit())
820 .unwrap_or(b.len());
821 if pos == 0 {
822 Err(ParseRIError::InvalidH16)
823 } else {
824 *b = &b[pos.min(4)..];
825 Ok(())
826 }
827}
828
829fn parse_ipv4_address(b: &mut &[u8]) -> Result<(), ParseRIError> {
841 parse_dec_octet(b)?;
842 for _ in 0..3 {
843 *b = b.strip_prefix(b".").ok_or(ParseRIError::InvalidDecOctet)?;
844 parse_dec_octet(b)?;
845 }
846 Ok(())
847}
848fn parse_dec_octet(b: &mut &[u8]) -> Result<(), ParseRIError> {
849 let len = match b {
850 [b'2', b'5', b'0'..=b'5', ..] => 3,
851 [b'2', b'0'..=b'4', b'0'..=b'9', ..] => 3,
852 [b'1', b'0'..=b'9', b'0'..=b'9', ..] => 3,
853 [b'1'..=b'9', b'0'..=b'9', ..] => 2,
854 [b'0'..=b'9', ..] => 1,
855 _ => return Err(ParseRIError::InvalidDecOctet),
856 };
857 *b = &b[len..];
858 Ok(())
859}
860
861fn parse_reg_name(b: &mut &[u8]) -> Result<(), ParseRIError> {
868 while !b.is_empty() && !matches!(b[0], b':' | b'@') && parse_pchar(b).is_ok() {}
871 Ok(())
872}
873
874fn parse_port(b: &mut &[u8]) -> Result<(), ParseRIError> {
881 let pos = b
882 .iter()
883 .position(|&b| !b.is_ascii_digit())
884 .unwrap_or(b.len());
885 *b = &b[pos..];
886 Ok(())
887}
888
889fn parse_path_abempty(b: &mut &[u8]) -> Result<(), ParseRIError> {
896 while let Some(rem) = b.strip_prefix(b"/") {
897 *b = rem;
898 parse_segment(b)?;
899 }
900 Ok(())
901}
902
903fn parse_path_absolute(b: &mut &[u8]) -> Result<(), ParseRIError> {
910 *b = b
911 .strip_prefix(b"/")
912 .ok_or(ParseRIError::InvalidPathAbsolute)?;
913 if parse_segment_nz(b).is_ok() {
914 while let Some(rem) = b.strip_prefix(b"/") {
915 *b = rem;
916 parse_segment(b)?;
917 }
918 }
919 Ok(())
920}
921
922fn parse_path_noscheme(b: &mut &[u8]) -> Result<(), ParseRIError> {
929 parse_segment_nz_nc(b)?;
930 while let Some(rem) = b.strip_prefix(b"/") {
931 *b = rem;
932 parse_segment(b)?;
933 }
934 Ok(())
935}
936
937fn parse_path_rootless(b: &mut &[u8]) -> Result<(), ParseRIError> {
944 parse_segment_nz(b)?;
945 while let Some(rem) = b.strip_prefix(b"/") {
946 *b = rem;
947 parse_segment(b)?;
948 }
949 Ok(())
950}
951
952fn parse_segment(b: &mut &[u8]) -> Result<(), ParseRIError> {
970 while parse_pchar(b).is_ok() {}
971 Ok(())
972}
973
974fn parse_segment_nz(b: &mut &[u8]) -> Result<(), ParseRIError> {
981 parse_pchar(b)?;
982 while parse_pchar(b).is_ok() {}
983 Ok(())
984}
985
986fn parse_segment_nz_nc(b: &mut &[u8]) -> Result<(), ParseRIError> {
994 if b.is_empty() || b[0] == b':' || parse_pchar(b).is_err() {
995 return Err(ParseRIError::InvalidSegmentNzNc);
996 }
997 while !b.is_empty() && b[0] != b':' && parse_pchar(b).is_ok() {}
998 Ok(())
999}
1000
1001fn parse_pchar(b: &mut &[u8]) -> Result<(), ParseRIError> {
1008 if b.is_empty() {
1009 return Err(ParseRIError::InvalidPChar);
1010 }
1011
1012 if is_unreserved(b[0]) || is_sub_delims(b[0]) || matches!(b[0], b':' | b'@') {
1013 *b = &b[1..];
1014 Ok(())
1015 } else if b.len() >= 3 && b[0] == b'%' && b[1].is_ascii_hexdigit() && b[2].is_ascii_hexdigit() {
1016 *b = &b[3..];
1017 Ok(())
1018 } else {
1019 Err(ParseRIError::InvalidPChar)
1020 }
1021}
1022
1023fn parse_query(b: &mut &[u8]) -> Result<(), ParseRIError> {
1030 loop {
1031 if let Some(rem) = b.strip_prefix(b"/") {
1032 *b = rem;
1033 } else if let Some(rem) = b.strip_prefix(b"?") {
1034 *b = rem;
1035 } else if parse_pchar(b).is_ok() {
1036 } else {
1038 break Ok(());
1039 }
1040 }
1041}
1042
1043fn parse_fragment(b: &mut &[u8]) -> Result<(), ParseRIError> {
1050 loop {
1051 if let Some(rem) = b.strip_prefix(b"/") {
1052 *b = rem;
1053 } else if let Some(rem) = b.strip_prefix(b"?") {
1054 *b = rem;
1055 } else if parse_pchar(b).is_ok() {
1056 } else {
1058 break Ok(());
1059 }
1060 }
1061}
1062
1063fn parse_relative_ref(b: &mut &[u8]) -> Result<(), ParseRIError> {
1070 parse_relative_part(b)?;
1071 if let Some(query) = b.strip_prefix(b"?") {
1072 *b = query;
1073 parse_query(b)?;
1074 }
1075 if let Some(fragment) = b.strip_prefix(b"#") {
1076 *b = fragment;
1077 parse_fragment(b)?;
1078 }
1079 Ok(())
1080}
1081
1082fn parse_relative_part(b: &mut &[u8]) -> Result<(), ParseRIError> {
1092 if let Some(rem) = b.strip_prefix(b"/") {
1093 if let Some(rem) = rem.strip_prefix(b"/") {
1094 *b = rem;
1095 parse_authority(b)?;
1096 parse_path_abempty(b)
1097 } else {
1098 parse_path_absolute(b)
1099 }
1100 } else {
1101 let orig = b.len();
1102 let ret = parse_path_noscheme(b);
1103 if orig == b.len() { Ok(()) } else { ret }
1105 }
1106}
1107
1108fn is_reserved(b: u8) -> bool {
1115 is_gen_delims(b) || is_sub_delims(b)
1116}
1117
1118fn is_gen_delims(b: u8) -> bool {
1125 matches!(b, b':' | b'/' | b'?' | b'#' | b'[' | b']' | b'@')
1126}
1127
1128fn is_sub_delims(b: u8) -> bool {
1135 matches!(
1136 b,
1137 b'!' | b'$' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+' | b',' | b';' | b'='
1138 )
1139}
1140
1141fn is_unreserved(b: u8) -> bool {
1148 b.is_ascii_alphanumeric() || matches!(b, b'-' | b'.' | b'_' | b'~')
1149}
1150
1151const LUT_BYTES: [u8; 256 * 3] = {
1152 const fn digit_to_hex_char(b: u8) -> u8 {
1153 if b < 10 { b + b'0' } else { b - 10 + b'A' }
1154 }
1155 let mut buf = [0u8; 256 * 3];
1156 let mut i = 0;
1157 while i < 256 {
1158 buf[3 * i] = b'%';
1159 let hi = (i as u8 >> 4) & 0xF;
1160 let lo = i as u8 & 0xF;
1161 buf[3 * i + 1] = digit_to_hex_char(hi);
1162 buf[3 * i + 2] = digit_to_hex_char(lo);
1163 i += 1;
1164 }
1165 buf
1166};
1167const LUT: &str = unsafe {
1168 from_utf8_unchecked(&LUT_BYTES)
1172};
1173
1174pub fn escape(s: &str) -> Cow<'_, str> {
1175 escape_except(s, |_| false)
1176}
1177
1178pub fn escape_bytes(b: &[u8]) -> Cow<'_, [u8]> {
1179 escape_bytes_except(b, |_| false)
1180}
1181
1182pub fn escape_except(s: &str, is_except: impl Fn(char) -> bool) -> Cow<'_, str> {
1183 let cap = s
1184 .chars()
1185 .filter_map(|c| (!is_except(c)).then_some(c.len_utf8() * 2))
1186 .sum::<usize>();
1187 if cap == 0 {
1188 return Cow::Borrowed(s);
1189 }
1190 let mut encode = [0; 6];
1191 let mut buf = String::with_capacity(s.len() + cap);
1192 for c in s.chars() {
1193 if is_except(c) {
1194 buf.push(c);
1195 } else {
1196 let encoded = c.encode_utf8(&mut encode);
1197 for b in encoded.bytes() {
1198 let index = b as usize * 3;
1199 buf.push_str(&LUT[index..index + 3]);
1200 }
1201 }
1202 }
1203 Cow::Owned(buf)
1204}
1205
1206pub fn escape_bytes_except(b: &[u8], is_except: impl Fn(u8) -> bool) -> Cow<'_, [u8]> {
1207 let cap = b.iter().copied().filter(|&b| !is_except(b)).count() * 2;
1208 if cap == 0 {
1209 return Cow::Borrowed(b);
1210 }
1211 let mut buf = Vec::with_capacity(b.len() + cap);
1212 for &b in b {
1213 if is_except(b) {
1214 buf.push(b);
1215 } else {
1216 let index = b as usize * 3;
1217 buf.extend_from_slice(&LUT_BYTES[index..index + 3]);
1218 }
1219 }
1220 Cow::Owned(buf)
1221}
1222
1223pub enum URIUnescapeError {
1224 InvalidEscape,
1225 Utf8Error(std::str::Utf8Error),
1226}
1227
1228impl From<std::str::Utf8Error> for URIUnescapeError {
1229 fn from(value: std::str::Utf8Error) -> Self {
1230 Self::Utf8Error(value)
1231 }
1232}
1233
1234pub fn unescape(s: &str) -> Result<Cow<'_, str>, URIUnescapeError> {
1235 if !s.contains('%') {
1236 return Ok(Cow::Borrowed(s));
1237 }
1238
1239 let mut split = s.split('%');
1240 let mut buf = String::with_capacity(s.len());
1241 buf.push_str(split.next().unwrap());
1242 let mut bytes = vec![];
1243 for chunk in split {
1244 if chunk.len() < 2 {
1245 return Err(URIUnescapeError::InvalidEscape);
1246 }
1247 let byte =
1248 u8::from_str_radix(&chunk[..2], 16).map_err(|_| URIUnescapeError::InvalidEscape)?;
1249 bytes.push(byte);
1250
1251 if chunk.len() > 2 {
1252 buf.push_str(from_utf8(&bytes)?);
1253 buf.push_str(&chunk[2..]);
1254 bytes.clear();
1255 }
1256 }
1257
1258 if !bytes.is_empty() {
1259 buf.push_str(from_utf8(&bytes)?);
1260 }
1261 Ok(Cow::Owned(buf))
1262}
1263
1264pub fn unescape_bytes(b: &[u8]) -> Result<Cow<'_, [u8]>, URIUnescapeError> {
1265 if !b.contains(&b'%') {
1266 return Ok(Cow::Borrowed(b));
1267 }
1268
1269 let mut split = b.split(|&b| b == b'%');
1270 let mut buf = Vec::with_capacity(b.len());
1271 buf.extend_from_slice(split.next().unwrap());
1272
1273 fn hexdigit_to_byte(hex: u8) -> u8 {
1274 if hex.is_ascii_digit() {
1275 hex - b'0'
1276 } else if hex.is_ascii_uppercase() {
1277 hex - b'A' + 10
1278 } else {
1279 hex - b'a' + 10
1280 }
1281 }
1282 for chunk in split {
1283 if chunk.len() < 2 || !chunk[0].is_ascii_hexdigit() || !chunk[1].is_ascii_hexdigit() {
1284 return Err(URIUnescapeError::InvalidEscape);
1285 }
1286 let hi = hexdigit_to_byte(chunk[0]);
1287 let lo = hexdigit_to_byte(chunk[1]);
1288 buf.push((hi << 4) | lo);
1289 }
1290 Ok(Cow::Owned(buf))
1291}
1292
1293#[derive(Debug, Clone, Copy)]
1294enum DecomposeState {
1295 Scheme,
1296 Authority,
1297 Root,
1298 Path,
1299 Query,
1300 Fragment,
1301 Finish,
1302}
1303
1304pub struct Components<'a> {
1305 state: DecomposeState,
1306 uri: &'a str,
1307}
1308
1309impl Components<'_> {
1310 fn new(uri: &str) -> Components<'_> {
1311 Components {
1312 state: DecomposeState::Scheme,
1313 uri,
1314 }
1315 }
1316}
1317
1318impl<'a> Iterator for Components<'a> {
1319 type Item = Component<'a>;
1320
1321 fn next(&mut self) -> Option<Self::Item> {
1322 use DecomposeState::*;
1323 loop {
1324 match self.state {
1325 Scheme => {
1326 self.state = Authority;
1327 let mut bytes = self.uri.as_bytes();
1328 if parse_scheme(&mut bytes).is_ok() && bytes.starts_with(b":") {
1329 let len = self.uri.len() - bytes.len();
1330 let (scheme, rem) = self.uri.split_at(len);
1331 self.uri = &rem[1..];
1332 break Some(Component::Scheme(scheme));
1333 }
1334 }
1335 Authority => {
1336 self.state = Root;
1337 if let Some(rem) = self.uri.strip_prefix("//") {
1338 let pos = rem.bytes().position(|b| b == b'/').unwrap_or(rem.len());
1339 let (mut authority, rem) = rem.split_at(pos);
1340 self.uri = rem;
1341 let mut userinfo = None;
1342 if let Some((ui, rem)) = authority.split_once('@') {
1343 userinfo = Some(ui);
1344 authority = rem;
1345 }
1346 let mut port = None;
1347 if let Some((host, p)) = authority.rsplit_once(':')
1348 && p.bytes().all(|b| b.is_ascii_digit())
1349 {
1350 port = Some(p);
1351 authority = host;
1352 }
1353 break Some(Component::Authority {
1354 userinfo,
1355 host: authority,
1356 port,
1357 });
1358 }
1359 }
1360 Root => {
1361 self.state = Path;
1362 if let Some(rem) = self.uri.strip_prefix('/') {
1363 self.uri = rem;
1364 break Some(Component::RootSegment);
1365 }
1366 }
1367 Path => {
1368 let pos = self
1369 .uri
1370 .bytes()
1371 .position(|b| b == b'/' || b == b'?' || b == b'#')
1372 .unwrap_or(self.uri.len());
1373 let (segment, rem) = self.uri.split_at(pos);
1374 if let Some(rem) = rem.strip_prefix('/') {
1375 self.uri = rem;
1376 } else {
1377 self.uri = rem;
1378 self.state = Query;
1379 }
1380 break Some(Component::Segment(segment));
1381 }
1382 Query => {
1383 self.state = Fragment;
1384 if let Some(rem) = self.uri.strip_prefix('?') {
1385 let pos = rem.bytes().position(|b| b == b'#').unwrap_or(rem.len());
1386 let (query, rem) = rem.split_at(pos);
1387 self.uri = rem;
1388 break Some(Component::Query(query));
1389 }
1390 }
1391 Fragment => {
1392 debug_assert!(self.uri.is_empty() || self.uri.starts_with('#'));
1393 self.state = Finish;
1394 if !self.uri.is_empty() {
1395 let (_, frag) = self.uri.split_at(1);
1396 self.uri = "";
1397 break Some(Component::Fragment(frag));
1398 }
1399 }
1400 Finish => break None,
1401 }
1402 }
1403 }
1404}
1405
1406pub enum Component<'a> {
1407 Scheme(&'a str),
1408 Authority {
1409 userinfo: Option<&'a str>,
1410 host: &'a str,
1411 port: Option<&'a str>,
1412 },
1413 RootSegment,
1414 Segment(&'a str),
1415 Query(&'a str),
1416 Fragment(&'a str),
1417}