1#![cfg_attr(docsrs, feature(doc_cfg))]
2
3pub mod query;
6
7use std::{
8 borrow::Cow,
9 convert::TryFrom,
10 error::Error,
11 fmt::{self, Display, Formatter, Write},
12 str::FromStr,
13 sync::Arc,
14};
15
16use percent_encoding::{NON_ALPHANUMERIC, PercentEncode, percent_decode_str, percent_encode};
17
18pub use self::query::QueryDict;
19
20#[derive(Debug, Copy, Clone)]
22pub enum UrlParseError {
23 InvalidUrl,
25 InvalidPort,
27}
28
29impl Display for UrlParseError {
30 fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
31 let msg = match *self {
32 Self::InvalidUrl => "invalid URL",
33 Self::InvalidPort => "invalid port",
34 };
35
36 f.write_str(msg)
37 }
38}
39
40impl Error for UrlParseError {}
41
42#[derive(Clone)]
44pub struct Url {
45 inner: Arc<InnerUrl>,
46}
47
48impl Url {
49 #[inline]
51 pub fn scheme(&self) -> &str {
52 self.inner.scheme()
53 }
54
55 #[inline]
57 pub fn auth(&self) -> Option<&str> {
58 self.inner.auth()
59 }
60
61 #[inline]
63 pub fn username(&self) -> Option<&str> {
64 self.inner.username()
65 }
66
67 #[inline]
69 pub fn password(&self) -> Option<&str> {
70 self.inner.password()
71 }
72
73 #[inline]
75 pub fn netloc(&self) -> &str {
76 self.inner.netloc()
77 }
78
79 #[inline]
81 pub fn host(&self) -> &str {
82 self.inner.host()
83 }
84
85 #[inline]
87 pub fn port(&self) -> Option<u16> {
88 self.inner.port()
89 }
90
91 #[inline]
93 pub fn path(&self) -> &str {
94 self.inner.path()
95 }
96
97 #[inline]
99 pub fn path_with_query(&self) -> &str {
100 self.inner.path_with_query()
101 }
102
103 #[inline]
105 pub fn path_with_query_and_fragment(&self) -> &str {
106 self.inner.path_with_query_and_fragment()
107 }
108
109 #[inline]
111 pub fn query(&self) -> Option<&str> {
112 self.inner.query()
113 }
114
115 #[inline]
117 pub fn fragment(&self) -> Option<&str> {
118 self.inner.fragment()
119 }
120
121 pub fn base_url(&self) -> Self {
123 Self {
124 inner: Arc::new(self.inner.base_url()),
125 }
126 }
127
128 pub fn with_auth(&self, auth: Option<&str>) -> Result<Self, UrlParseError> {
132 let scheme = self.inner.scheme();
133 let netloc = self.inner.netloc();
134 let path = self.inner.path_with_query_and_fragment();
135
136 let url = if let Some(auth) = auth {
137 format!("{scheme}://{auth}@{netloc}{path}")
138 } else {
139 format!("{scheme}://{netloc}{path}")
140 };
141
142 Url::try_from(url)
143 }
144
145 pub fn with_credentials(&self, username: &str, password: &str) -> Result<Self, UrlParseError> {
149 let scheme = self.inner.scheme();
150 let netloc = self.inner.netloc();
151 let path = self.inner.path_with_query_and_fragment();
152
153 Url::try_from(format!("{scheme}://{username}:{password}@{netloc}{path}"))
154 }
155
156 pub fn with_netloc(&self, netloc: &str) -> Result<Self, UrlParseError> {
158 let scheme = self.inner.scheme();
159 let path = self.inner.path_with_query_and_fragment();
160
161 let url = if let Some(auth) = self.inner.auth() {
162 format!("{scheme}://{auth}@{netloc}{path}")
163 } else {
164 format!("{scheme}://{netloc}{path}")
165 };
166
167 Url::try_from(url)
168 }
169
170 pub fn with_query(&self, query: Option<&str>) -> Result<Self, UrlParseError> {
174 let base_url = self.inner.base_url_str();
175 let path = self.inner.path();
176
177 let mut url = format!("{base_url}{path}");
178
179 if let Some(query) = query {
180 let _ = write!(url, "?{query}");
181 }
182
183 if let Some(fragment) = self.inner.fragment() {
184 let _ = write!(url, "#{fragment}");
185 }
186
187 Url::try_from(url)
188 }
189
190 pub fn with_fragment(&self, fragment: Option<&str>) -> Result<Self, UrlParseError> {
194 let base_url = self.inner.base_url_str();
195 let path = self.inner.path();
196
197 let mut url = format!("{base_url}{path}");
198
199 if let Some(query) = self.inner.query() {
200 let _ = write!(url, "?{query}");
201 }
202
203 if let Some(fragment) = fragment {
204 let _ = write!(url, "#{fragment}");
205 }
206
207 Url::try_from(url)
208 }
209
210 pub fn join(&self, input: &str) -> Result<Self, UrlParseError> {
222 let scheme = self.inner.scheme();
223 let base_url = self.inner.base_url_str();
224
225 let input = input.trim();
226
227 if input.is_empty() {
228 Ok(self.clone())
229 } else if input.starts_with("//") {
230 Url::try_from(format!("{scheme}:{input}"))
231 } else if input.starts_with('/') {
232 Url::try_from(format!("{base_url}{input}"))
233 } else if is_absolute_url(input) {
234 Url::try_from(input.to_string())
235 } else {
236 let current_path = self.path();
237
238 let rightmost_separator = current_path
239 .rfind('/')
240 .expect("the path should contain at least one path separator");
241
242 let base_path = ¤t_path[..rightmost_separator];
243
244 Url::try_from(format!("{base_url}{base_path}/{input}"))
245 }
246 }
247}
248
249impl AsRef<str> for Url {
250 #[inline]
251 fn as_ref(&self) -> &str {
252 &self.inner.serialized
253 }
254}
255
256impl Display for Url {
257 #[inline]
258 fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
259 f.write_str(&self.inner.serialized)
260 }
261}
262
263impl FromStr for Url {
264 type Err = UrlParseError;
265
266 #[inline]
267 fn from_str(s: &str) -> Result<Url, UrlParseError> {
268 s.into_url()
269 }
270}
271
272impl TryFrom<String> for Url {
273 type Error = UrlParseError;
274
275 #[inline]
276 fn try_from(s: String) -> Result<Url, UrlParseError> {
277 s.into_url()
278 }
279}
280
281impl From<Url> for String {
282 fn from(url: Url) -> Self {
283 match Arc::try_unwrap(url.inner) {
284 Ok(inner) => inner.serialized,
285 Err(inner) => inner.serialized.clone(),
286 }
287 }
288}
289
290pub trait IntoUrl {
292 fn into_url(self) -> Result<Url, UrlParseError>;
294}
295
296impl IntoUrl for String {
297 #[inline(never)]
298 fn into_url(self) -> Result<Url, UrlParseError> {
299 let mut inner = InnerUrl {
300 serialized: self,
301 hierarchy_start: 0,
302 netloc_start: 0,
303 username_start: None,
304 password_start: None,
305 port_start: None,
306 path_start: None,
307 query_start: None,
308 fragment_start: None,
309 port: None,
310 };
311
312 inner.init()?;
313
314 let res = Url {
315 inner: Arc::new(inner),
316 };
317
318 Ok(res)
319 }
320}
321
322impl IntoUrl for &String {
323 #[inline]
324 fn into_url(self) -> Result<Url, UrlParseError> {
325 IntoUrl::into_url(String::from(self))
326 }
327}
328
329impl IntoUrl for &str {
330 #[inline]
331 fn into_url(self) -> Result<Url, UrlParseError> {
332 IntoUrl::into_url(String::from(self))
333 }
334}
335
336impl IntoUrl for Url {
337 #[inline]
338 fn into_url(self) -> Result<Url, UrlParseError> {
339 Ok(self)
340 }
341}
342
343impl IntoUrl for &Url {
344 #[inline]
345 fn into_url(self) -> Result<Url, UrlParseError> {
346 Ok(self.clone())
347 }
348}
349
350struct InnerUrl {
352 serialized: String,
353 hierarchy_start: usize,
354 netloc_start: usize,
355 username_start: Option<usize>,
356 password_start: Option<usize>,
357 port_start: Option<usize>,
358 path_start: Option<usize>,
359 query_start: Option<usize>,
360 fragment_start: Option<usize>,
361 port: Option<u16>,
362}
363
364impl InnerUrl {
365 fn init(&mut self) -> Result<(), UrlParseError> {
367 if let Some(pos) = self.serialized.find(':') {
368 if !is_valid_scheme(&self.serialized[..pos]) {
369 return Err(UrlParseError::InvalidUrl);
370 }
371
372 self.process_hierarchy(pos + 1)
373 } else {
374 Err(UrlParseError::InvalidUrl)
375 }
376 }
377
378 fn process_hierarchy(&mut self, hierarchy_start: usize) -> Result<(), UrlParseError> {
380 self.hierarchy_start = hierarchy_start;
381
382 let suffix = &self.serialized[hierarchy_start..];
383
384 if !suffix.starts_with("//") {
385 return Err(UrlParseError::InvalidUrl);
386 }
387
388 let authority_start = hierarchy_start + 2;
389
390 let suffix = &self.serialized[authority_start..];
391
392 if let Some(pos) = suffix.find('/') {
393 let path_start = authority_start + pos;
394
395 self.process_authority(authority_start, path_start)?;
396 self.process_path(path_start);
397 } else {
398 self.process_authority(authority_start, self.serialized.len())?;
399 }
400
401 Ok(())
402 }
403
404 fn process_authority(
406 &mut self,
407 authority_start: usize,
408 authority_end: usize,
409 ) -> Result<(), UrlParseError> {
410 let authority = &self.serialized[authority_start..authority_end];
411
412 if let Some(pos) = authority.rfind('@') {
413 let user_info_end = authority_start + pos;
414
415 self.process_user_info(authority_start, user_info_end);
416
417 self.netloc_start = authority_start + pos + 1;
418 } else {
419 self.netloc_start = authority_start;
420 }
421
422 let netloc = &self.serialized[self.netloc_start..authority_end];
423
424 if !netloc.ends_with(']') {
425 if let Some(pos) = netloc.rfind(':') {
426 let port_start = pos + 1;
427
428 let port =
429 u16::from_str(&netloc[port_start..]).map_err(|_| UrlParseError::InvalidPort)?;
430
431 self.port_start = Some(self.netloc_start + port_start);
432 self.port = Some(port);
433 }
434 }
435
436 Ok(())
437 }
438
439 fn process_user_info(&mut self, user_info_start: usize, user_info_end: usize) {
441 self.username_start = Some(user_info_start);
442
443 let user_info = &self.serialized[user_info_start..user_info_end];
444
445 if let Some(pos) = user_info.find(':') {
446 self.password_start = Some(user_info_start + pos + 1);
447 }
448 }
449
450 fn process_path(&mut self, path_start: usize) {
452 self.path_start = Some(path_start);
453
454 let suffix = &self.serialized[path_start..];
455
456 if let Some(pos) = suffix.find('#') {
457 self.fragment_start = Some(path_start + pos + 1);
458 }
459
460 let path_or_query_end = self
461 .fragment_start
462 .map(|pos| pos - 1)
463 .unwrap_or(self.serialized.len());
464
465 let path_with_query = &self.serialized[path_start..path_or_query_end];
466
467 if let Some(pos) = path_with_query.find('?') {
468 self.query_start = Some(path_start + pos + 1);
469 }
470 }
471
472 #[inline]
474 fn scheme(&self) -> &str {
475 &self.serialized[..self.hierarchy_start - 1]
476 }
477
478 fn auth(&self) -> Option<&str> {
480 let start = self.username_start?;
481 let end = self.netloc_start - 1;
482
483 Some(&self.serialized[start..end])
484 }
485
486 fn username(&self) -> Option<&str> {
488 let start = self.username_start?;
489
490 let end = self.password_start.unwrap_or(self.netloc_start) - 1;
491
492 Some(&self.serialized[start..end])
493 }
494
495 fn password(&self) -> Option<&str> {
497 self.password_start
498 .map(|start| &self.serialized[start..self.netloc_start - 1])
499 }
500
501 fn netloc(&self) -> &str {
503 let end = self.path_start.unwrap_or(self.serialized.len());
504
505 &self.serialized[self.netloc_start..end]
506 }
507
508 fn host(&self) -> &str {
510 let end = self
511 .port_start
512 .map(|pos| pos - 1)
513 .or(self.path_start)
514 .unwrap_or(self.serialized.len());
515
516 &self.serialized[self.netloc_start..end]
517 }
518
519 #[inline]
521 fn port(&self) -> Option<u16> {
522 self.port
523 }
524
525 fn path(&self) -> &str {
527 if let Some(start) = self.path_start {
528 let end = self
529 .query_start
530 .or(self.fragment_start)
531 .map(|pos| pos - 1)
532 .unwrap_or(self.serialized.len());
533
534 &self.serialized[start..end]
535 } else {
536 "/"
537 }
538 }
539
540 fn query(&self) -> Option<&str> {
542 let start = self.query_start?;
543
544 let end = self
545 .fragment_start
546 .map(|pos| pos - 1)
547 .unwrap_or(self.serialized.len());
548
549 Some(&self.serialized[start..end])
550 }
551
552 fn path_with_query(&self) -> &str {
554 if let Some(start) = self.path_start {
555 let end = self
556 .fragment_start
557 .map(|idx| idx - 1)
558 .unwrap_or(self.serialized.len());
559
560 &self.serialized[start..end]
561 } else {
562 "/"
563 }
564 }
565
566 fn path_with_query_and_fragment(&self) -> &str {
568 if let Some(start) = self.path_start {
569 &self.serialized[start..]
570 } else {
571 "/"
572 }
573 }
574
575 fn fragment(&self) -> Option<&str> {
577 self.fragment_start.map(|start| &self.serialized[start..])
578 }
579
580 fn base_url_str(&self) -> &str {
585 let end = self.path_start.unwrap_or(self.serialized.len());
586
587 &self.serialized[..end]
588 }
589
590 fn base_url(&self) -> Self {
593 Self {
594 serialized: String::from(self.base_url_str()),
595 hierarchy_start: self.hierarchy_start,
596 netloc_start: self.netloc_start,
597 username_start: self.username_start,
598 password_start: self.password_start,
599 port_start: self.port_start,
600 path_start: None,
601 query_start: None,
602 fragment_start: None,
603 port: self.port,
604 }
605 }
606}
607
608fn is_valid_scheme(value: &str) -> bool {
610 let mut chars = value.chars();
611
612 let starts_with_ascii_alphabetic = chars
613 .next()
614 .map(|c| c.is_ascii_alphabetic())
615 .unwrap_or(false);
616
617 starts_with_ascii_alphabetic
618 && chars.all(|c| c.is_ascii_alphanumeric() || c == '+' || c == '-' || c == '.')
619}
620
621fn is_absolute_url(value: &str) -> bool {
623 value
624 .split_once(':')
625 .map(|(scheme, hierarchy)| is_valid_scheme(scheme) && hierarchy.starts_with("//"))
626 .unwrap_or(false)
627}
628
629#[derive(Clone)]
631pub struct UrlEncoded<'a> {
632 inner: PercentEncode<'a>,
633}
634
635impl Display for UrlEncoded<'_> {
636 #[inline]
637 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
638 Display::fmt(&self.inner, f)
639 }
640}
641
642pub fn url_encode<'a, T>(s: &'a T) -> UrlEncoded<'a>
644where
645 T: AsRef<[u8]> + ?Sized,
646{
647 fn inner(input: &[u8]) -> UrlEncoded<'_> {
649 UrlEncoded {
650 inner: percent_encode(input, NON_ALPHANUMERIC),
651 }
652 }
653
654 inner(s.as_ref())
655}
656
657pub fn url_decode(s: &str) -> Cow<'_, [u8]> {
659 Cow::from(percent_decode_str(s))
660}
661
662#[cfg(test)]
663mod test {
664 use super::*;
665
666 #[test]
667 fn test_plain_hostname() {
668 let url = Url::from_str("foo");
669
670 assert!(url.is_err());
671 }
672
673 #[test]
674 fn test_no_authority() {
675 let url = Url::from_str("foo:bar");
676
677 assert!(url.is_err());
678 }
679
680 #[test]
681 fn test_invalid_port() {
682 let url = Url::from_str("http://foo:100000");
683
684 assert!(url.is_err());
685 }
686
687 #[test]
688 fn test_minimal_url() {
689 let url = Url::from_str("http://foo").unwrap();
690
691 assert_eq!(url.scheme(), "http");
692 assert_eq!(url.username(), None);
693 assert_eq!(url.password(), None);
694 assert_eq!(url.host(), "foo");
695 assert_eq!(url.port(), None);
696 assert_eq!(url.path(), "/");
697 assert_eq!(url.query(), None);
698 assert_eq!(url.fragment(), None);
699 }
700
701 #[test]
702 fn test_empty_port() {
703 let url = Url::from_str("http://foo:12").unwrap();
704
705 assert_eq!(url.scheme(), "http");
706 assert_eq!(url.username(), None);
707 assert_eq!(url.password(), None);
708 assert_eq!(url.host(), "foo");
709 assert_eq!(url.port(), Some(12));
710 assert_eq!(url.path(), "/");
711 assert_eq!(url.query(), None);
712 assert_eq!(url.fragment(), None);
713 }
714
715 #[test]
716 fn test_empty_username() {
717 let url = Url::from_str("http://@foo/some/path").unwrap();
718
719 assert_eq!(url.scheme(), "http");
720 assert_eq!(url.username(), Some(""));
721 assert_eq!(url.password(), None);
722 assert_eq!(url.host(), "foo");
723 assert_eq!(url.port(), None);
724 assert_eq!(url.path(), "/some/path");
725 assert_eq!(url.query(), None);
726 assert_eq!(url.fragment(), None);
727 }
728
729 #[test]
730 fn test_no_password() {
731 let url = Url::from_str("http://user@foo/").unwrap();
732
733 assert_eq!(url.scheme(), "http");
734 assert_eq!(url.username(), Some("user"));
735 assert_eq!(url.password(), None);
736 assert_eq!(url.host(), "foo");
737 assert_eq!(url.port(), None);
738 assert_eq!(url.path(), "/");
739 assert_eq!(url.query(), None);
740 assert_eq!(url.fragment(), None);
741 }
742
743 #[test]
744 fn test_empty_password() {
745 let url = Url::from_str("http://user:@foo/").unwrap();
746
747 assert_eq!(url.scheme(), "http");
748 assert_eq!(url.username(), Some("user"));
749 assert_eq!(url.password(), Some(""));
750 assert_eq!(url.host(), "foo");
751 assert_eq!(url.port(), None);
752 assert_eq!(url.path(), "/");
753 assert_eq!(url.query(), None);
754 assert_eq!(url.fragment(), None);
755 }
756
757 #[test]
758 fn test_password() {
759 let url = Url::from_str("http://user:pass@foo/").unwrap();
760
761 assert_eq!(url.scheme(), "http");
762 assert_eq!(url.username(), Some("user"));
763 assert_eq!(url.password(), Some("pass"));
764 assert_eq!(url.host(), "foo");
765 assert_eq!(url.port(), None);
766 assert_eq!(url.path(), "/");
767 assert_eq!(url.query(), None);
768 assert_eq!(url.fragment(), None);
769 }
770
771 #[test]
772 fn test_fragment_and_query() {
773 let url = Url::from_str("http://foo/some/path?and=query&a=b#and-fragment").unwrap();
774
775 assert_eq!(url.scheme(), "http");
776 assert_eq!(url.username(), None);
777 assert_eq!(url.password(), None);
778 assert_eq!(url.host(), "foo");
779 assert_eq!(url.port(), None);
780 assert_eq!(url.path(), "/some/path");
781 assert_eq!(url.query(), Some("and=query&a=b"));
782 assert_eq!(url.fragment(), Some("and-fragment"));
783 }
784
785 #[test]
786 fn test_query_alone() {
787 let url = Url::from_str("http://foo/some/path?and=query&a=b").unwrap();
788
789 assert_eq!(url.scheme(), "http");
790 assert_eq!(url.username(), None);
791 assert_eq!(url.password(), None);
792 assert_eq!(url.host(), "foo");
793 assert_eq!(url.port(), None);
794 assert_eq!(url.path(), "/some/path");
795 assert_eq!(url.query(), Some("and=query&a=b"));
796 assert_eq!(url.fragment(), None);
797 }
798
799 #[test]
800 fn test_fragment_alone() {
801 let url = Url::from_str("http://foo/some/path#and-fragment").unwrap();
802
803 assert_eq!(url.scheme(), "http");
804 assert_eq!(url.username(), None);
805 assert_eq!(url.password(), None);
806 assert_eq!(url.host(), "foo");
807 assert_eq!(url.port(), None);
808 assert_eq!(url.path(), "/some/path");
809 assert_eq!(url.query(), None);
810 assert_eq!(url.fragment(), Some("and-fragment"));
811 }
812
813 #[test]
814 fn test_full_featured_url() {
815 let url =
816 Url::from_str("http://user:pass@foo:123/some/path?and=query&a=b#and-fragment").unwrap();
817
818 assert_eq!(url.scheme(), "http");
819 assert_eq!(url.username(), Some("user"));
820 assert_eq!(url.password(), Some("pass"));
821 assert_eq!(url.host(), "foo");
822 assert_eq!(url.port(), Some(123));
823 assert_eq!(url.path(), "/some/path");
824 assert_eq!(url.query(), Some("and=query&a=b"));
825 assert_eq!(url.fragment(), Some("and-fragment"));
826 }
827
828 #[test]
829 fn test_joining() {
830 let base_url = Url::from_str("http://foo").unwrap();
831
832 let n1 = base_url.join("").unwrap();
833 let n2 = base_url.join("/foo").unwrap();
834 let n3 = base_url.join("bar").unwrap();
835
836 assert_eq!(n1.as_ref(), "http://foo");
837 assert_eq!(n2.as_ref(), "http://foo/foo");
838 assert_eq!(n3.as_ref(), "http://foo/bar");
839
840 let base_url = Url::from_str("http://foo/").unwrap();
841
842 let n1 = base_url.join("").unwrap();
843 let n2 = base_url.join("/foo").unwrap();
844 let n3 = base_url.join("bar").unwrap();
845
846 assert_eq!(n1.as_ref(), "http://foo/");
847 assert_eq!(n2.as_ref(), "http://foo/foo");
848 assert_eq!(n3.as_ref(), "http://foo/bar");
849
850 let base_url = Url::from_str("http://foo/hello").unwrap();
851
852 let n1 = base_url.join("").unwrap();
853 let n2 = base_url.join("/foo").unwrap();
854 let n3 = base_url.join("bar").unwrap();
855
856 assert_eq!(n1.as_ref(), "http://foo/hello");
857 assert_eq!(n2.as_ref(), "http://foo/foo");
858 assert_eq!(n3.as_ref(), "http://foo/bar");
859
860 let base_url = Url::from_str("http://foo/hello/world").unwrap();
861
862 let n1 = base_url.join("").unwrap();
863 let n2 = base_url.join("/foo").unwrap();
864 let n3 = base_url.join("bar").unwrap();
865 let n4 = base_url.join("//hello/world").unwrap();
866 let n5 = base_url.join("https://hello/world").unwrap();
867
868 assert_eq!(n1.as_ref(), "http://foo/hello/world");
869 assert_eq!(n2.as_ref(), "http://foo/foo");
870 assert_eq!(n3.as_ref(), "http://foo/hello/bar");
871 assert_eq!(n4.as_ref(), "http://hello/world");
872 assert_eq!(n5.as_ref(), "https://hello/world");
873 }
874}