1use std::{
2 borrow::{Borrow, Cow},
3 ops::Deref,
4 path::Path,
5 rc::Rc,
6 str::{from_utf8, from_utf8_unchecked},
7 sync::Arc,
8};
9
10use crate::ParseRIError;
11
12#[derive(Debug, PartialEq, Eq, Hash)]
13#[repr(transparent)]
14pub struct URIStr {
15 uri: str,
16}
17
18impl URIStr {
19 fn new(s: &str) -> &Self {
20 unsafe {
21 &*(s as *const str as *const Self)
25 }
26 }
27
28 pub fn resolve(&self, reference: &Self) -> URIString {
31 use Component::*;
32
33 assert!(
34 self.is_absolute(),
35 "'{}' is not absolute",
36 self.as_escaped_str()
37 );
38
39 let mut ref_components = reference.components().peekable();
40 if ref_components
41 .next_if(|comp| matches!(comp, Scheme(_)))
42 .is_some()
43 {
44 let mut ret = reference.to_owned();
45 ret.normalize();
46 return ret;
47 }
48
49 if ref_components
50 .next_if(|comp| matches!(comp, Authority { .. }))
51 .is_some()
52 {
53 let mut ret = URIString {
55 uri: [self.scheme().unwrap(), ":", &reference.uri].concat(),
56 };
57 ret.normalize();
58 return ret;
59 }
60
61 let mut components = self.components().peekable();
62 let mut uri = String::new();
63 if let Some(Scheme(scheme)) = components.next_if(|comp| matches!(comp, Scheme(_))) {
64 uri.push_str(scheme);
65 uri.push(':');
66 }
67 if let Some(Authority {
68 userinfo,
69 host,
70 port,
71 }) = components.next_if(|comp| matches!(comp, Authority { .. }))
72 {
73 uri.push_str("//");
74 if let Some(userinfo) = userinfo {
75 uri.push_str(userinfo);
76 uri.push(':');
77 }
78 uri.push_str(host);
79 if let Some(port) = port {
80 uri.push(':');
81 uri.push_str(port);
82 }
83 }
84
85 if ref_components
86 .next_if(|comp| matches!(comp, RootSegment))
87 .is_some()
88 {
89 uri.push_str(&reference.uri);
90 let mut ret = URIString { uri };
91 ret.normalize();
92 return ret;
93 }
94
95 let mut segments = vec![];
96 let has_root = components
97 .next_if(|comp| matches!(comp, RootSegment))
98 .is_some();
99 let mut has_dot_segment = false;
100 while let Some(Segment(segment)) = components.next_if(|comp| matches!(comp, Segment(_))) {
101 segments.push(segment);
102 has_dot_segment |= segment == "." || segment == "..";
103 }
104 if has_dot_segment {
105 segments = normalize_path_segments(segments.into_iter(), has_root);
106 }
107
108 let mut has_path = false;
109 if let Some(Segment(segment)) = ref_components.next_if(|comp| matches!(comp, Segment(_))) {
110 let mut buf = vec![segment];
111 while let Some(Segment(segment)) =
112 ref_components.next_if(|comp| matches!(comp, Segment(_)))
113 {
114 buf.push(segment);
115 }
116 if buf.len() > 1 || !buf[0].is_empty() {
117 segments.pop();
118 segments.extend(buf);
119 has_path = true;
120 }
121 }
122 build_normalized_path(segments.into_iter(), has_root, &mut uri);
123
124 if let Some(Query(query)) = ref_components.next_if(|comp| matches!(comp, Query(_))) {
125 uri.push('?');
126 uri.push_str(query);
127 } else if !has_path
128 && let Some(Query(query)) = components.next_if(|comp| matches!(comp, Query(_)))
129 {
130 uri.push('?');
131 uri.push_str(query);
132 }
133
134 if let Some(Fragment(fragment)) = ref_components.next() {
135 uri.push('#');
136 uri.push_str(fragment);
137 }
138
139 URIString { uri }
140 }
141
142 pub fn as_escaped_str(&self) -> &str {
143 &self.uri
144 }
145
146 pub fn as_unescaped_str(&self) -> Option<Cow<'_, str>> {
147 unescape(&self.uri).ok()
148 }
149
150 pub fn is_absolute(&self) -> bool {
153 self.scheme().is_some() && self.fragment().is_none()
154 }
155
156 pub fn is_relative(&self) -> bool {
159 self.scheme().is_none()
160 }
161
162 pub fn scheme(&self) -> Option<&str> {
163 let pos = self.uri.bytes().position(is_reserved)?;
164 (self.uri.as_bytes()[pos] == b':').then_some(&self.uri[..pos])
165 }
166
167 pub fn authority(&self) -> Option<&str> {
168 let rem = self
169 .uri
170 .strip_prefix("//")
171 .or_else(|| self.uri.split_once("://").map(|p| p.1))?;
172 Some(rem.split_once('/').map(|p| p.0).unwrap_or(rem))
173 }
174
175 pub fn userinfo(&self) -> Option<&str> {
176 Some(self.authority()?.split_once('@')?.0)
177 }
178
179 pub fn host(&self) -> Option<&str> {
180 let mut auth = self.authority()?;
181 if let Some((_userinfo, rem)) = auth.split_once('@') {
182 auth = rem;
183 }
184 if let Some((host, port)) = auth.rsplit_once(':')
185 && port.bytes().all(|b| b.is_ascii_digit())
186 {
187 auth = host;
188 }
189 Some(auth)
190 }
191
192 pub fn port(&self) -> Option<&str> {
193 let (_, port) = self.authority()?.rsplit_once(':')?;
194 port.bytes().all(|b| b.is_ascii_digit()).then_some(port)
195 }
196
197 pub fn path(&self) -> &str {
198 let mut path = &self.uri;
199 if let Some(scheme) = self.scheme() {
200 path = &path[scheme.len() + 1..];
202 }
203 if let Some(rem) = path.strip_prefix("//") {
204 let pos = rem.bytes().position(|b| b == b'/').unwrap_or(rem.len());
206 path = &rem[pos..]
207 }
208
209 path.split_once(['?', '#']).map(|p| p.0).unwrap_or(path)
210 }
211
212 pub fn query(&self) -> Option<&str> {
213 let pos = self.uri.bytes().position(|b| b == b'?' || b == b'#')?;
214 if self.uri.as_bytes()[pos] == b'#' {
215 return None;
216 }
217 let query = &self.uri[pos + 1..];
218 let pos = query.bytes().position(|b| b == b'#').unwrap_or(query.len());
219 Some(&query[..pos])
220 }
221
222 pub fn fragment(&self) -> Option<&str> {
223 let pos = self.uri.bytes().position(|b| b == b'#')?;
224 Some(&self.uri[pos + 1..])
225 }
226
227 pub fn components(&self) -> Components<'_> {
228 Components::new(&self.uri)
229 }
230}
231
232impl ToOwned for URIStr {
233 type Owned = URIString;
234
235 fn to_owned(&self) -> Self::Owned {
236 URIString {
237 uri: self.uri.to_owned(),
238 }
239 }
240}
241
242impl From<&URIStr> for URIString {
243 fn from(value: &URIStr) -> Self {
244 value.to_owned()
245 }
246}
247
248impl Clone for Box<URIStr> {
249 fn clone(&self) -> Self {
250 self.as_ref().into()
251 }
252}
253
254macro_rules! impl_boxed_convertion_uri_str {
255 ($( $t:ident ),*) => {
256 $(
257 impl From<&URIStr> for $t<URIStr> {
258 fn from(value: &URIStr) -> Self {
259 let boxed: $t<str> = value.uri.into();
260 unsafe {
261 std::mem::transmute(boxed)
265 }
266 }
267 }
268 )*
269 };
270}
271impl_boxed_convertion_uri_str!(Box, Rc, Arc);
272
273#[derive(Debug, Clone, PartialEq, Eq, Hash)]
274#[repr(transparent)]
275pub struct URIString {
276 uri: String,
287}
288
289impl URIString {
290 pub fn parse(uri: impl AsRef<str>) -> Result<Self, ParseRIError> {
291 fn _parse(uri: &str) -> Result<URIString, ParseRIError> {
292 let uri = escape_except(uri, |b| {
293 b.is_ascii() && (is_reserved(b as u8) || is_unreserved(b as u8))
294 });
295 let mut bytes = uri.as_bytes();
296 parse_uri_reference(&mut bytes)?;
297 if !bytes.is_empty() {
298 Err(ParseRIError::NotTermination)
299 } else {
300 Ok(URIString {
301 uri: uri.into_owned(),
302 })
303 }
304 }
305 _parse(uri.as_ref())
306 }
307
308 pub fn parse_file_path(path: impl AsRef<Path>) -> Result<Self, ParseRIError> {
314 #[cfg(target_family = "unix")]
315 fn _parse_file_path(path: &Path) -> Result<URIString, ParseRIError> {
316 let mut path_str = path.to_str().ok_or(ParseRIError::Unsupported)?.to_owned();
317 if (path.is_dir() || (path.as_os_str().as_encoded_bytes().ends_with(b"\\")))
318 && !path_str.ends_with('/')
319 {
320 path_str.push('/');
321 }
322 if path.is_absolute() {
323 path_str.insert_str(0, "file://");
324 }
325 URIString::parse(path_str)
326 }
327 #[cfg(target_family = "windows")]
328 fn _parse_file_path(path: &Path) -> Result<URIString, ParseRIError> {
329 use std::path::{Component::*, Prefix::*};
330
331 let mut path_str = String::new();
332 let mut verbatim = false;
333 for comp in path.components() {
334 match comp {
335 Prefix(prefix) => match prefix.kind() {
336 Verbatim(root) => {
337 path_str.push_str("file:///");
338 path_str.push_str(
339 &root
340 .to_str()
341 .ok_or(ParseRIError::Unsupported)?
342 .replace('/', "%2F"),
343 );
344 verbatim = true;
345 }
346 VerbatimUNC(server, root) => {
347 path_str.push_str("file://");
348 path_str.push_str(
349 &server
350 .to_str()
351 .ok_or(ParseRIError::Unsupported)?
352 .replace('/', "%2F"),
353 );
354 path_str.push('/');
355 path_str.push_str(
356 &root
357 .to_str()
358 .ok_or(ParseRIError::Unsupported)?
359 .replace('/', "%2F"),
360 );
361 verbatim = true;
362 }
363 VerbatimDisk(letter) => {
364 path_str.push_str("file:");
365 path_str.push(letter as char);
366 path_str.push(':');
367 verbatim = true;
368 }
369 DeviceNS(device) => {
370 path_str.push_str("file:///");
371 path_str.push_str(device.to_str().ok_or(ParseRIError::Unsupported)?);
372 }
373 UNC(server, root) => {
374 path_str.push_str("file://");
375 path_str.push_str(server.to_str().ok_or(ParseRIError::Unsupported)?);
376 path_str.push('/');
377 path_str.push_str(root.to_str().ok_or(ParseRIError::Unsupported)?);
378 }
379 Disk(letter) => {
380 path_str.push_str("file:");
381 path_str.push(letter as char);
382 path_str.push(':');
383 }
384 },
385 RootDir => {}
386 CurDir => path_str.push_str("/."),
387 ParentDir => path_str.push_str("/.."),
388 Normal(segment) => {
389 path_str.push('/');
390 let segment = segment.to_str().ok_or(ParseRIError::Unsupported)?;
391 if verbatim {
392 path_str.push_str(&segment.replace('/', "%2F"));
393 } else {
394 path_str.push_str(segment);
395 }
396 }
397 }
398 }
399 if (path.is_dir()
400 || (path.as_os_str().as_encoded_bytes().ends_with(b"\\")
401 || (!verbatim && path.as_os_str().as_encoded_bytes().ends_with(b"/"))))
402 && !path_str.ends_with('/')
403 {
404 path_str.push('/');
405 }
406 URIString::parse(path_str)
407 }
408 #[cfg(all(not(target_family = "unix"), not(target_family = "windows")))]
409 fn _parse_file_path(path: &Path) -> Result<URIString, ParseRIError> {
410 todo!()
411 }
412 _parse_file_path(path.as_ref())
413 }
414
415 pub fn into_boxed_uri_str(self) -> Box<URIStr> {
416 Box::from(self.as_ref())
417 }
418
419 pub fn normalize(&mut self) {
422 use Component::*;
423
424 let mut uri = String::with_capacity(self.uri.len());
425 let mut paths = vec![];
426 let mut query = None;
427 let mut fragment = None;
428 let mut has_root = false;
429 for comp in self.components() {
430 match comp {
431 Scheme(scheme) => {
432 uri.push_str(&scheme.to_ascii_lowercase());
433 uri.push(':');
434 }
435 Authority {
436 userinfo,
437 host,
438 port,
439 } => {
440 uri.push_str("//");
441 if let Some(userinfo) = userinfo {
442 uri.push_str(userinfo);
443 uri.push('@');
444 }
445 uri.push_str(host);
446 if let Some(port) = port {
447 uri.push(':');
448 uri.push_str(port);
449 }
450 }
451 RootSegment => has_root = true,
452 Segment(segment) => paths.push(segment),
453 Query(q) => query = Some(q),
454 Fragment(f) => fragment = Some(f),
455 }
456 }
457 build_normalized_path(paths.into_iter(), has_root, &mut uri);
458 if let Some(query) = query {
459 uri.push('?');
460 uri.push_str(query);
461 }
462 if let Some(fragment) = fragment {
463 uri.push('#');
464 uri.push_str(fragment);
465 }
466 self.uri = uri;
467 }
468}
469
470impl AsRef<URIStr> for URIString {
471 fn as_ref(&self) -> &URIStr {
472 URIStr::new(&self.uri)
473 }
474}
475
476impl Borrow<URIStr> for URIString {
477 fn borrow(&self) -> &URIStr {
478 self.as_ref()
479 }
480}
481
482impl Deref for URIString {
483 type Target = URIStr;
484
485 fn deref(&self) -> &Self::Target {
486 self.as_ref()
487 }
488}
489
490macro_rules! impl_convertion_uri_string {
491 ($( $t:ty ),*) => {
492 $(
493 impl From<URIString> for $t {
494 fn from(value: URIString) -> $t {
495 From::from(value.as_ref())
496 }
497 }
498 )*
499 };
500}
501impl_convertion_uri_string!(Box<URIStr>, Rc<URIStr>, Arc<URIStr>);
502
503fn build_normalized_path<'a>(
504 segments: impl Iterator<Item = &'a str>,
505 has_root: bool,
506 buffer: &mut String,
507) {
508 let segments = normalize_path_segments(segments, has_root);
509 if has_root {
510 buffer.push('/');
511 }
512 for (i, seg) in segments.into_iter().enumerate() {
513 if i > 0 {
514 buffer.push('/');
515 }
516 buffer.push_str(seg);
517 }
518}
519
520fn normalize_path_segments<'a>(
521 segments: impl Iterator<Item = &'a str>,
522 has_root: bool,
523) -> Vec<&'a str> {
524 let mut stack = vec![];
525 let mut last_dot = false;
526 for seg in segments {
527 if seg == "." {
528 last_dot = true;
530 } else if seg == ".." {
531 if !stack.is_empty() && stack.last() != Some(&"..") {
532 stack.pop();
533 } else if !has_root {
534 stack.push(seg);
535 }
536 last_dot = true;
537 } else {
538 stack.push(seg);
539 last_dot = false;
540 }
541 }
542
543 if last_dot {
544 stack.push("");
545 }
546
547 stack
548}
549
550fn parse_uri_reference(b: &mut &[u8]) -> Result<(), ParseRIError> {
557 if b.is_empty() || matches!(b[0], b'/' | b'?' | b'#') {
558 parse_relative_ref(b)
561 } else {
562 if !b[0].is_ascii_alphabetic() {
566 parse_relative_ref(b)
569 } else {
570 if let Some(&c) = b
574 .iter()
575 .find(|&&b| !b.is_ascii_alphanumeric() && !matches!(b, b'+' | b'-' | b'.'))
576 && c == b':'
577 {
578 parse_uri(b)
579 } else {
580 parse_relative_ref(b)
581 }
582 }
583 }
584}
585
586fn parse_uri(b: &mut &[u8]) -> Result<(), ParseRIError> {
593 parse_scheme(b)?;
594 *b = b
595 .strip_prefix(b":")
596 .ok_or(ParseRIError::InvalidSchemeSeparator)?;
597 parse_hier_part(b)?;
598 if let Some(query) = b.strip_prefix(b"?") {
599 *b = query;
600 parse_query(b)?;
601 }
602 if let Some(fragment) = b.strip_prefix(b"#") {
603 *b = fragment;
604 parse_fragment(b)?;
605 }
606 Ok(())
607}
608
609fn parse_scheme(b: &mut &[u8]) -> Result<(), ParseRIError> {
616 if b.is_empty() || !b[0].is_ascii_alphabetic() {
617 return Err(ParseRIError::InvalidScheme);
618 }
619 let pos = b
620 .iter()
621 .position(|&b| !b.is_ascii_alphanumeric() && !matches!(b, b'+' | b'-' | b'.'))
622 .unwrap_or(b.len());
623 *b = &b[pos..];
624 Ok(())
625}
626
627fn parse_hier_part(b: &mut &[u8]) -> Result<(), ParseRIError> {
637 if let Some(rem) = b.strip_prefix(b"/") {
638 if let Some(rem) = rem.strip_prefix(b"/") {
641 *b = rem;
645 parse_authority(b)?;
646 parse_path_abempty(b)
647 } else {
648 parse_path_absolute(b)
651 }
652 } else {
653 let mut dum = *b;
655 if parse_pchar(&mut dum).is_ok() {
656 parse_path_rootless(b)
658 } else {
659 Ok(())
663 }
664 }
665}
666
667fn parse_authority(b: &mut &[u8]) -> Result<(), ParseRIError> {
674 if b.starts_with(b"[") {
675 parse_ip_literal(b)?;
677 if let Some(rem) = b.strip_prefix(b":") {
678 *b = rem;
679 parse_port(b)?;
680 }
681 return Ok(());
682 }
683
684 let mut colon = usize::MAX;
710 let mut now = 0;
711 let mut t = *b;
712 while !t.is_empty() {
713 let pos = t
714 .iter()
715 .position(|&b| !is_unreserved(b) && !is_sub_delims(b) && b != b'%')
716 .unwrap_or(t.len());
717 t = &t[pos..];
718 now += pos;
719 if let Some(rem) = t.strip_prefix(b":") {
720 now += 1;
721 t = rem;
722 colon = colon.min(now);
723 } else {
724 break;
725 }
726 }
727
728 debug_assert_eq!(now, b.len() - t.len());
729
730 if let Some(rem) = t.strip_prefix(b"@") {
731 *b = rem;
732 parse_host(b)?;
733 if let Some(rem) = b.strip_prefix(b":") {
734 *b = rem;
735 parse_port(b)?;
736 }
737 Ok(())
738 } else if t.starts_with(b"[") {
739 Err(ParseRIError::InvalidAuthority)
740 } else if colon < usize::MAX {
741 *b = &b[colon + 1..];
742 parse_port(b)
743 } else {
744 *b = t;
745 Ok(())
746 }
747}
748
749fn parse_host(b: &mut &[u8]) -> Result<(), ParseRIError> {
767 if b.starts_with(b"[") {
768 parse_ip_literal(b)
769 } else {
770 parse_reg_name(b)
772 }
773}
774
775fn parse_ip_literal(b: &mut &[u8]) -> Result<(), ParseRIError> {
782 *b = b.strip_prefix(b"[").ok_or(ParseRIError::InvalidIPLiteral)?;
783 if !b.is_empty() && b[0].eq_ignore_ascii_case(&b'v') {
784 parse_ipv_future(b)?;
785 } else {
786 parse_ipv6_address(b)?;
787 }
788 *b = b.strip_prefix(b"]").ok_or(ParseRIError::InvalidIPLiteral)?;
789 Ok(())
790}
791
792fn parse_ipv_future(b: &mut &[u8]) -> Result<(), ParseRIError> {
799 if b.is_empty() || !b[0].eq_ignore_ascii_case(&b'v') {
800 return Err(ParseRIError::InvalidIPvFuture);
801 }
802 *b = &b[1..];
803 let pos = b
804 .iter()
805 .position(|&b| !b.is_ascii_hexdigit())
806 .unwrap_or(b.len());
807 if !(1..=b.len() - 2).contains(&pos) {
808 return Err(ParseRIError::InvalidIPvFuture);
809 }
810 *b = &b[pos..];
811 *b = b.strip_prefix(b".").ok_or(ParseRIError::InvalidIPvFuture)?;
812 let pos = b
813 .iter()
814 .position(|&b| !is_unreserved(b) && !is_sub_delims(b) && b != b':')
815 .unwrap_or(b.len());
816 if pos == 0 {
817 return Err(ParseRIError::InvalidIPvFuture);
818 }
819 *b = &b[pos..];
820 Ok(())
821}
822
823fn parse_ipv6_address(b: &mut &[u8]) -> Result<(), ParseRIError> {
842 let mut cnt = 1;
843 let mut omit = false;
844 if let Some(rem) = b.strip_prefix(b":") {
845 *b = rem;
846 omit = true;
847 } else {
848 parse_h16(b)?;
849 }
850
851 while cnt + (omit as i32) < 8
852 && let Some(rem) = b.strip_prefix(b":")
853 {
854 *b = rem;
855 if b.starts_with(b":") {
856 if omit {
857 return Err(ParseRIError::InvalidIPv6address);
858 }
859 omit = true;
860 cnt += 1;
861 continue;
862 }
863
864 let mut dum = *b;
872 if parse_ipv4_address(&mut dum).is_ok() {
873 *b = dum;
874 cnt += 2;
876 break;
878 } else if !b.is_empty() && b[0].is_ascii_hexdigit() {
879 parse_h16(b)?;
880 }
881 }
882
883 if (omit && cnt <= 8) || (!omit && cnt == 8) {
886 Ok(())
887 } else {
888 Err(ParseRIError::InvalidIPv6address)
889 }
890}
891
892fn parse_h16(b: &mut &[u8]) -> Result<(), ParseRIError> {
900 let pos = b
901 .iter()
902 .position(|&b| !b.is_ascii_hexdigit())
903 .unwrap_or(b.len());
904 if pos == 0 {
905 Err(ParseRIError::InvalidH16)
906 } else {
907 *b = &b[pos.min(4)..];
908 Ok(())
909 }
910}
911
912fn parse_ipv4_address(b: &mut &[u8]) -> Result<(), ParseRIError> {
924 parse_dec_octet(b)?;
925 for _ in 0..3 {
926 *b = b.strip_prefix(b".").ok_or(ParseRIError::InvalidDecOctet)?;
927 parse_dec_octet(b)?;
928 }
929 Ok(())
930}
931fn parse_dec_octet(b: &mut &[u8]) -> Result<(), ParseRIError> {
932 let len = match b {
933 [b'2', b'5', b'0'..=b'5', ..] => 3,
934 [b'2', b'0'..=b'4', b'0'..=b'9', ..] => 3,
935 [b'1', b'0'..=b'9', b'0'..=b'9', ..] => 3,
936 [b'1'..=b'9', b'0'..=b'9', ..] => 2,
937 [b'0'..=b'9', ..] => 1,
938 _ => return Err(ParseRIError::InvalidDecOctet),
939 };
940 *b = &b[len..];
941 Ok(())
942}
943
944fn parse_reg_name(b: &mut &[u8]) -> Result<(), ParseRIError> {
951 while !b.is_empty() && !matches!(b[0], b':' | b'@') && parse_pchar(b).is_ok() {}
954 Ok(())
955}
956
957fn parse_port(b: &mut &[u8]) -> Result<(), ParseRIError> {
964 let pos = b
965 .iter()
966 .position(|&b| !b.is_ascii_digit())
967 .unwrap_or(b.len());
968 *b = &b[pos..];
969 Ok(())
970}
971
972fn parse_path_abempty(b: &mut &[u8]) -> Result<(), ParseRIError> {
979 while let Some(rem) = b.strip_prefix(b"/") {
980 *b = rem;
981 parse_segment(b)?;
982 }
983 Ok(())
984}
985
986fn parse_path_absolute(b: &mut &[u8]) -> Result<(), ParseRIError> {
993 *b = b
994 .strip_prefix(b"/")
995 .ok_or(ParseRIError::InvalidPathAbsolute)?;
996 if parse_segment_nz(b).is_ok() {
997 while let Some(rem) = b.strip_prefix(b"/") {
998 *b = rem;
999 parse_segment(b)?;
1000 }
1001 }
1002 Ok(())
1003}
1004
1005fn parse_path_noscheme(b: &mut &[u8]) -> Result<(), ParseRIError> {
1012 parse_segment_nz_nc(b)?;
1013 while let Some(rem) = b.strip_prefix(b"/") {
1014 *b = rem;
1015 parse_segment(b)?;
1016 }
1017 Ok(())
1018}
1019
1020fn parse_path_rootless(b: &mut &[u8]) -> Result<(), ParseRIError> {
1027 parse_segment_nz(b)?;
1028 while let Some(rem) = b.strip_prefix(b"/") {
1029 *b = rem;
1030 parse_segment(b)?;
1031 }
1032 Ok(())
1033}
1034
1035fn parse_segment(b: &mut &[u8]) -> Result<(), ParseRIError> {
1053 while parse_pchar(b).is_ok() {}
1054 Ok(())
1055}
1056
1057fn parse_segment_nz(b: &mut &[u8]) -> Result<(), ParseRIError> {
1064 parse_pchar(b)?;
1065 while parse_pchar(b).is_ok() {}
1066 Ok(())
1067}
1068
1069fn parse_segment_nz_nc(b: &mut &[u8]) -> Result<(), ParseRIError> {
1077 if b.is_empty() || b[0] == b':' || parse_pchar(b).is_err() {
1078 return Err(ParseRIError::InvalidSegmentNzNc);
1079 }
1080 while !b.is_empty() && b[0] != b':' && parse_pchar(b).is_ok() {}
1081 Ok(())
1082}
1083
1084fn parse_pchar(b: &mut &[u8]) -> Result<(), ParseRIError> {
1091 if b.is_empty() {
1092 return Err(ParseRIError::InvalidPChar);
1093 }
1094
1095 if is_unreserved(b[0]) || is_sub_delims(b[0]) || matches!(b[0], b':' | b'@') {
1096 *b = &b[1..];
1097 Ok(())
1098 } else if b.len() >= 3 && b[0] == b'%' && b[1].is_ascii_hexdigit() && b[2].is_ascii_hexdigit() {
1099 *b = &b[3..];
1100 Ok(())
1101 } else {
1102 Err(ParseRIError::InvalidPChar)
1103 }
1104}
1105
1106fn parse_query(b: &mut &[u8]) -> Result<(), ParseRIError> {
1113 loop {
1114 if let Some(rem) = b.strip_prefix(b"/") {
1115 *b = rem;
1116 } else if let Some(rem) = b.strip_prefix(b"?") {
1117 *b = rem;
1118 } else if parse_pchar(b).is_ok() {
1119 } else {
1121 break Ok(());
1122 }
1123 }
1124}
1125
1126fn parse_fragment(b: &mut &[u8]) -> Result<(), ParseRIError> {
1133 loop {
1134 if let Some(rem) = b.strip_prefix(b"/") {
1135 *b = rem;
1136 } else if let Some(rem) = b.strip_prefix(b"?") {
1137 *b = rem;
1138 } else if parse_pchar(b).is_ok() {
1139 } else {
1141 break Ok(());
1142 }
1143 }
1144}
1145
1146fn parse_relative_ref(b: &mut &[u8]) -> Result<(), ParseRIError> {
1153 parse_relative_part(b)?;
1154 if let Some(query) = b.strip_prefix(b"?") {
1155 *b = query;
1156 parse_query(b)?;
1157 }
1158 if let Some(fragment) = b.strip_prefix(b"#") {
1159 *b = fragment;
1160 parse_fragment(b)?;
1161 }
1162 Ok(())
1163}
1164
1165fn parse_relative_part(b: &mut &[u8]) -> Result<(), ParseRIError> {
1175 if let Some(rem) = b.strip_prefix(b"/") {
1176 if let Some(rem) = rem.strip_prefix(b"/") {
1177 *b = rem;
1178 parse_authority(b)?;
1179 parse_path_abempty(b)
1180 } else {
1181 parse_path_absolute(b)
1182 }
1183 } else {
1184 let orig = b.len();
1185 let ret = parse_path_noscheme(b);
1186 if orig == b.len() { Ok(()) } else { ret }
1188 }
1189}
1190
1191fn is_reserved(b: u8) -> bool {
1198 is_gen_delims(b) || is_sub_delims(b)
1199}
1200
1201fn is_gen_delims(b: u8) -> bool {
1208 matches!(b, b':' | b'/' | b'?' | b'#' | b'[' | b']' | b'@')
1209}
1210
1211fn is_sub_delims(b: u8) -> bool {
1218 matches!(
1219 b,
1220 b'!' | b'$' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+' | b',' | b';' | b'='
1221 )
1222}
1223
1224fn is_unreserved(b: u8) -> bool {
1231 b.is_ascii_alphanumeric() || matches!(b, b'-' | b'.' | b'_' | b'~')
1232}
1233
1234const LUT_BYTES: [u8; 256 * 3] = {
1235 const fn digit_to_hex_char(b: u8) -> u8 {
1236 if b < 10 { b + b'0' } else { b - 10 + b'A' }
1237 }
1238 let mut buf = [0u8; 256 * 3];
1239 let mut i = 0;
1240 while i < 256 {
1241 buf[3 * i] = b'%';
1242 let hi = (i as u8 >> 4) & 0xF;
1243 let lo = i as u8 & 0xF;
1244 buf[3 * i + 1] = digit_to_hex_char(hi);
1245 buf[3 * i + 2] = digit_to_hex_char(lo);
1246 i += 1;
1247 }
1248 buf
1249};
1250const LUT: &str = unsafe {
1251 from_utf8_unchecked(&LUT_BYTES)
1255};
1256
1257pub fn escape(s: &str) -> Cow<'_, str> {
1258 escape_except(s, |_| false)
1259}
1260
1261pub fn escape_bytes(b: &[u8]) -> Cow<'_, [u8]> {
1262 escape_bytes_except(b, |_| false)
1263}
1264
1265pub fn escape_except(s: &str, is_except: impl Fn(char) -> bool) -> Cow<'_, str> {
1266 let cap = s
1267 .chars()
1268 .filter_map(|c| (!is_except(c)).then_some(c.len_utf8() * 2))
1269 .sum::<usize>();
1270 if cap == 0 {
1271 return Cow::Borrowed(s);
1272 }
1273 let mut encode = [0; 6];
1274 let mut buf = String::with_capacity(s.len() + cap);
1275 for c in s.chars() {
1276 if is_except(c) {
1277 buf.push(c);
1278 } else {
1279 let encoded = c.encode_utf8(&mut encode);
1280 for b in encoded.bytes() {
1281 let index = b as usize * 3;
1282 buf.push_str(&LUT[index..index + 3]);
1283 }
1284 }
1285 }
1286 Cow::Owned(buf)
1287}
1288
1289pub fn escape_bytes_except(b: &[u8], is_except: impl Fn(u8) -> bool) -> Cow<'_, [u8]> {
1290 let cap = b.iter().copied().filter(|&b| !is_except(b)).count() * 2;
1291 if cap == 0 {
1292 return Cow::Borrowed(b);
1293 }
1294 let mut buf = Vec::with_capacity(b.len() + cap);
1295 for &b in b {
1296 if is_except(b) {
1297 buf.push(b);
1298 } else {
1299 let index = b as usize * 3;
1300 buf.extend_from_slice(&LUT_BYTES[index..index + 3]);
1301 }
1302 }
1303 Cow::Owned(buf)
1304}
1305
1306pub enum URIUnescapeError {
1307 InvalidEscape,
1308 Utf8Error(std::str::Utf8Error),
1309}
1310
1311impl From<std::str::Utf8Error> for URIUnescapeError {
1312 fn from(value: std::str::Utf8Error) -> Self {
1313 Self::Utf8Error(value)
1314 }
1315}
1316
1317pub fn unescape(s: &str) -> Result<Cow<'_, str>, URIUnescapeError> {
1318 if !s.contains('%') {
1319 return Ok(Cow::Borrowed(s));
1320 }
1321
1322 let mut split = s.split('%');
1323 let mut buf = String::with_capacity(s.len());
1324 buf.push_str(split.next().unwrap());
1325 let mut bytes = vec![];
1326 for chunk in split {
1327 if chunk.len() < 2 {
1328 return Err(URIUnescapeError::InvalidEscape);
1329 }
1330 let byte =
1331 u8::from_str_radix(&chunk[..2], 16).map_err(|_| URIUnescapeError::InvalidEscape)?;
1332 bytes.push(byte);
1333
1334 if chunk.len() > 2 {
1335 buf.push_str(from_utf8(&bytes)?);
1336 buf.push_str(&chunk[2..]);
1337 bytes.clear();
1338 }
1339 }
1340
1341 if !bytes.is_empty() {
1342 buf.push_str(from_utf8(&bytes)?);
1343 }
1344 Ok(Cow::Owned(buf))
1345}
1346
1347pub fn unescape_bytes(b: &[u8]) -> Result<Cow<'_, [u8]>, URIUnescapeError> {
1348 if !b.contains(&b'%') {
1349 return Ok(Cow::Borrowed(b));
1350 }
1351
1352 let mut split = b.split(|&b| b == b'%');
1353 let mut buf = Vec::with_capacity(b.len());
1354 buf.extend_from_slice(split.next().unwrap());
1355
1356 fn hexdigit_to_byte(hex: u8) -> u8 {
1357 if hex.is_ascii_digit() {
1358 hex - b'0'
1359 } else if hex.is_ascii_uppercase() {
1360 hex - b'A' + 10
1361 } else {
1362 hex - b'a' + 10
1363 }
1364 }
1365 for chunk in split {
1366 if chunk.len() < 2 || !chunk[0].is_ascii_hexdigit() || !chunk[1].is_ascii_hexdigit() {
1367 return Err(URIUnescapeError::InvalidEscape);
1368 }
1369 let hi = hexdigit_to_byte(chunk[0]);
1370 let lo = hexdigit_to_byte(chunk[1]);
1371 buf.push((hi << 4) | lo);
1372 }
1373 Ok(Cow::Owned(buf))
1374}
1375
1376#[derive(Debug, Clone, Copy)]
1377enum DecomposeState {
1378 Scheme,
1379 Authority,
1380 Root,
1381 Path,
1382 Query,
1383 Fragment,
1384 Finish,
1385}
1386
1387pub struct Components<'a> {
1388 state: DecomposeState,
1389 uri: &'a str,
1390}
1391
1392impl Components<'_> {
1393 fn new(uri: &str) -> Components<'_> {
1394 Components {
1395 state: DecomposeState::Scheme,
1396 uri,
1397 }
1398 }
1399}
1400
1401impl<'a> Iterator for Components<'a> {
1402 type Item = Component<'a>;
1403
1404 fn next(&mut self) -> Option<Self::Item> {
1405 use DecomposeState::*;
1406 loop {
1407 match self.state {
1408 Scheme => {
1409 self.state = Authority;
1410 let mut bytes = self.uri.as_bytes();
1411 if parse_scheme(&mut bytes).is_ok() && bytes.starts_with(b":") {
1412 let len = self.uri.len() - bytes.len();
1413 let (scheme, rem) = self.uri.split_at(len);
1414 self.uri = &rem[1..];
1415 break Some(Component::Scheme(scheme));
1416 }
1417 }
1418 Authority => {
1419 self.state = Root;
1420 if let Some(rem) = self.uri.strip_prefix("//") {
1421 let pos = rem.bytes().position(|b| b == b'/').unwrap_or(rem.len());
1422 let (mut authority, rem) = rem.split_at(pos);
1423 self.uri = rem;
1424 let mut userinfo = None;
1425 if let Some((ui, rem)) = authority.split_once('@') {
1426 userinfo = Some(ui);
1427 authority = rem;
1428 }
1429 let mut port = None;
1430 if let Some((host, p)) = authority.rsplit_once(':')
1431 && p.bytes().all(|b| b.is_ascii_digit())
1432 {
1433 port = Some(p);
1434 authority = host;
1435 }
1436 break Some(Component::Authority {
1437 userinfo,
1438 host: authority,
1439 port,
1440 });
1441 }
1442 }
1443 Root => {
1444 self.state = Path;
1445 if let Some(rem) = self.uri.strip_prefix('/') {
1446 self.uri = rem;
1447 break Some(Component::RootSegment);
1448 }
1449 }
1450 Path => {
1451 let pos = self
1452 .uri
1453 .bytes()
1454 .position(|b| b == b'/' || b == b'?' || b == b'#')
1455 .unwrap_or(self.uri.len());
1456 let (segment, rem) = self.uri.split_at(pos);
1457 if let Some(rem) = rem.strip_prefix('/') {
1458 self.uri = rem;
1459 } else {
1460 self.uri = rem;
1461 self.state = Query;
1462 }
1463 break Some(Component::Segment(segment));
1464 }
1465 Query => {
1466 self.state = Fragment;
1467 if let Some(rem) = self.uri.strip_prefix('?') {
1468 let pos = rem.bytes().position(|b| b == b'#').unwrap_or(rem.len());
1469 let (query, rem) = rem.split_at(pos);
1470 self.uri = rem;
1471 break Some(Component::Query(query));
1472 }
1473 }
1474 Fragment => {
1475 debug_assert!(self.uri.is_empty() || self.uri.starts_with('#'));
1476 self.state = Finish;
1477 if !self.uri.is_empty() {
1478 let (_, frag) = self.uri.split_at(1);
1479 self.uri = "";
1480 break Some(Component::Fragment(frag));
1481 }
1482 }
1483 Finish => break None,
1484 }
1485 }
1486 }
1487}
1488
1489pub enum Component<'a> {
1490 Scheme(&'a str),
1491 Authority {
1492 userinfo: Option<&'a str>,
1493 host: &'a str,
1494 port: Option<&'a str>,
1495 },
1496 RootSegment,
1497 Segment(&'a str),
1498 Query(&'a str),
1499 Fragment(&'a str),
1500}