1#![cfg_attr(docsrs, feature(doc_cfg))]
2
3pub mod query;
6
7use std::{
8 borrow::Cow,
9 convert::TryFrom,
10 error::Error,
11 fmt::{self, Display, Formatter, Write},
12 str::FromStr,
13 sync::Arc,
14};
15
16use percent_encoding::{NON_ALPHANUMERIC, PercentEncode, percent_decode_str, percent_encode};
17
18pub use self::query::QueryDict;
19
20#[derive(Debug, Copy, Clone)]
22pub enum UrlParseError {
23 InvalidUrl,
25 InvalidPort,
27}
28
29impl Display for UrlParseError {
30 fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
31 let msg = match *self {
32 Self::InvalidUrl => "invalid URL",
33 Self::InvalidPort => "invalid port",
34 };
35
36 f.write_str(msg)
37 }
38}
39
40impl Error for UrlParseError {}
41
42#[derive(Clone)]
44pub struct Url {
45 inner: Arc<InnerUrl>,
46}
47
48impl Url {
49 #[inline]
51 pub fn scheme(&self) -> &str {
52 self.inner.scheme()
53 }
54
55 #[inline]
57 pub fn auth(&self) -> Option<&str> {
58 self.inner.auth()
59 }
60
61 #[inline]
63 pub fn username(&self) -> Option<&str> {
64 self.inner.username()
65 }
66
67 #[inline]
69 pub fn password(&self) -> Option<&str> {
70 self.inner.password()
71 }
72
73 #[inline]
75 pub fn netloc(&self) -> &str {
76 self.inner.netloc()
77 }
78
79 #[inline]
81 pub fn host(&self) -> &str {
82 self.inner.host()
83 }
84
85 #[inline]
87 pub fn port(&self) -> Option<u16> {
88 self.inner.port()
89 }
90
91 #[inline]
93 pub fn path(&self) -> &str {
94 self.inner.path()
95 }
96
97 #[inline]
99 pub fn path_with_query(&self) -> &str {
100 self.inner.path_with_query()
101 }
102
103 #[inline]
105 pub fn path_with_query_and_fragment(&self) -> &str {
106 self.inner.path_with_query_and_fragment()
107 }
108
109 #[inline]
111 pub fn query(&self) -> Option<&str> {
112 self.inner.query()
113 }
114
115 #[inline]
117 pub fn fragment(&self) -> Option<&str> {
118 self.inner.fragment()
119 }
120
121 pub fn base_url(&self) -> Self {
123 Self {
124 inner: Arc::new(self.inner.base_url()),
125 }
126 }
127
128 pub fn with_auth(&self, auth: Option<&str>) -> Result<Self, UrlParseError> {
132 let scheme = self.inner.scheme();
133 let netloc = self.inner.netloc();
134 let path = self.inner.path_with_query_and_fragment();
135
136 let url = if let Some(auth) = auth {
137 format!("{scheme}://{auth}@{netloc}{path}")
138 } else {
139 format!("{scheme}://{netloc}{path}")
140 };
141
142 Url::try_from(url)
143 }
144
145 pub fn with_credentials(&self, username: &str, password: &str) -> Result<Self, UrlParseError> {
149 let scheme = self.inner.scheme();
150 let netloc = self.inner.netloc();
151 let path = self.inner.path_with_query_and_fragment();
152
153 Url::try_from(format!("{scheme}://{username}:{password}@{netloc}{path}"))
154 }
155
156 pub fn with_netloc(&self, netloc: &str) -> Result<Self, UrlParseError> {
158 let scheme = self.inner.scheme();
159 let path = self.inner.path_with_query_and_fragment();
160
161 let url = if let Some(auth) = self.inner.auth() {
162 format!("{scheme}://{auth}@{netloc}{path}")
163 } else {
164 format!("{scheme}://{netloc}{path}")
165 };
166
167 Url::try_from(url)
168 }
169
170 pub fn with_query(&self, query: Option<&str>) -> Result<Self, UrlParseError> {
174 let base_url = self.inner.base_url_str();
175 let path = self.inner.path();
176
177 let mut url = format!("{base_url}{path}");
178
179 if let Some(query) = query {
180 let _ = write!(url, "?{query}");
181 }
182
183 if let Some(fragment) = self.inner.fragment() {
184 let _ = write!(url, "#{fragment}");
185 }
186
187 Url::try_from(url)
188 }
189
190 pub fn with_fragment(&self, fragment: Option<&str>) -> Result<Self, UrlParseError> {
194 let base_url = self.inner.base_url_str();
195 let path = self.inner.path();
196
197 let mut url = format!("{base_url}{path}");
198
199 if let Some(query) = self.inner.query() {
200 let _ = write!(url, "?{query}");
201 }
202
203 if let Some(fragment) = fragment {
204 let _ = write!(url, "#{fragment}");
205 }
206
207 Url::try_from(url)
208 }
209
210 pub fn join(&self, input: &str) -> Result<Self, UrlParseError> {
222 let scheme = self.inner.scheme();
223 let base_url = self.inner.base_url_str();
224
225 let input = input.trim();
226
227 if input.is_empty() {
228 Ok(self.clone())
229 } else if input.starts_with("//") {
230 Url::try_from(format!("{scheme}:{input}"))
231 } else if input.starts_with('/') {
232 Url::try_from(format!("{base_url}{input}"))
233 } else if is_absolute_url(input) {
234 Url::try_from(input.to_string())
235 } else {
236 let current_path = self.path();
237
238 let rightmost_separator = current_path
239 .rfind('/')
240 .expect("the path should contain at least one path separator");
241
242 let base_path = ¤t_path[..rightmost_separator];
243
244 Url::try_from(format!("{base_url}{base_path}/{input}"))
245 }
246 }
247}
248
249impl AsRef<str> for Url {
250 #[inline]
251 fn as_ref(&self) -> &str {
252 &self.inner.serialized
253 }
254}
255
256impl Display for Url {
257 #[inline]
258 fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
259 f.write_str(&self.inner.serialized)
260 }
261}
262
263impl FromStr for Url {
264 type Err = UrlParseError;
265
266 #[inline]
267 fn from_str(s: &str) -> Result<Url, UrlParseError> {
268 s.into_url()
269 }
270}
271
272impl TryFrom<String> for Url {
273 type Error = UrlParseError;
274
275 #[inline]
276 fn try_from(s: String) -> Result<Url, UrlParseError> {
277 s.into_url()
278 }
279}
280
281pub trait IntoUrl {
283 fn into_url(self) -> Result<Url, UrlParseError>;
285}
286
287impl IntoUrl for String {
288 #[inline(never)]
289 fn into_url(self) -> Result<Url, UrlParseError> {
290 let mut inner = InnerUrl {
291 serialized: self,
292 hierarchy_start: 0,
293 netloc_start: 0,
294 username_start: None,
295 password_start: None,
296 port_start: None,
297 path_start: None,
298 query_start: None,
299 fragment_start: None,
300 port: None,
301 };
302
303 inner.init()?;
304
305 let res = Url {
306 inner: Arc::new(inner),
307 };
308
309 Ok(res)
310 }
311}
312
313impl IntoUrl for &String {
314 #[inline]
315 fn into_url(self) -> Result<Url, UrlParseError> {
316 IntoUrl::into_url(String::from(self))
317 }
318}
319
320impl IntoUrl for &str {
321 #[inline]
322 fn into_url(self) -> Result<Url, UrlParseError> {
323 IntoUrl::into_url(String::from(self))
324 }
325}
326
327impl IntoUrl for Url {
328 #[inline]
329 fn into_url(self) -> Result<Url, UrlParseError> {
330 Ok(self)
331 }
332}
333
334impl IntoUrl for &Url {
335 #[inline]
336 fn into_url(self) -> Result<Url, UrlParseError> {
337 Ok(self.clone())
338 }
339}
340
341struct InnerUrl {
343 serialized: String,
344 hierarchy_start: usize,
345 netloc_start: usize,
346 username_start: Option<usize>,
347 password_start: Option<usize>,
348 port_start: Option<usize>,
349 path_start: Option<usize>,
350 query_start: Option<usize>,
351 fragment_start: Option<usize>,
352 port: Option<u16>,
353}
354
355impl InnerUrl {
356 fn init(&mut self) -> Result<(), UrlParseError> {
358 if let Some(pos) = self.serialized.find(':') {
359 if !is_valid_scheme(&self.serialized[..pos]) {
360 return Err(UrlParseError::InvalidUrl);
361 }
362
363 self.process_hierarchy(pos + 1)
364 } else {
365 Err(UrlParseError::InvalidUrl)
366 }
367 }
368
369 fn process_hierarchy(&mut self, hierarchy_start: usize) -> Result<(), UrlParseError> {
371 self.hierarchy_start = hierarchy_start;
372
373 let suffix = &self.serialized[hierarchy_start..];
374
375 if !suffix.starts_with("//") {
376 return Err(UrlParseError::InvalidUrl);
377 }
378
379 let authority_start = hierarchy_start + 2;
380
381 let suffix = &self.serialized[authority_start..];
382
383 if let Some(pos) = suffix.find('/') {
384 let path_start = authority_start + pos;
385
386 self.process_authority(authority_start, path_start)?;
387 self.process_path(path_start);
388 } else {
389 self.process_authority(authority_start, self.serialized.len())?;
390 }
391
392 Ok(())
393 }
394
395 fn process_authority(
397 &mut self,
398 authority_start: usize,
399 authority_end: usize,
400 ) -> Result<(), UrlParseError> {
401 let authority = &self.serialized[authority_start..authority_end];
402
403 if let Some(pos) = authority.rfind('@') {
404 let user_info_end = authority_start + pos;
405
406 self.process_user_info(authority_start, user_info_end);
407
408 self.netloc_start = authority_start + pos + 1;
409 } else {
410 self.netloc_start = authority_start;
411 }
412
413 let netloc = &self.serialized[self.netloc_start..authority_end];
414
415 if !netloc.ends_with(']') {
416 if let Some(pos) = netloc.rfind(':') {
417 let port_start = pos + 1;
418
419 let port =
420 u16::from_str(&netloc[port_start..]).map_err(|_| UrlParseError::InvalidPort)?;
421
422 self.port_start = Some(self.netloc_start + port_start);
423 self.port = Some(port);
424 }
425 }
426
427 Ok(())
428 }
429
430 fn process_user_info(&mut self, user_info_start: usize, user_info_end: usize) {
432 self.username_start = Some(user_info_start);
433
434 let user_info = &self.serialized[user_info_start..user_info_end];
435
436 if let Some(pos) = user_info.find(':') {
437 self.password_start = Some(user_info_start + pos + 1);
438 }
439 }
440
441 fn process_path(&mut self, path_start: usize) {
443 self.path_start = Some(path_start);
444
445 let suffix = &self.serialized[path_start..];
446
447 if let Some(pos) = suffix.find('#') {
448 self.fragment_start = Some(path_start + pos + 1);
449 }
450
451 let path_or_query_end = self
452 .fragment_start
453 .map(|pos| pos - 1)
454 .unwrap_or(self.serialized.len());
455
456 let path_with_query = &self.serialized[path_start..path_or_query_end];
457
458 if let Some(pos) = path_with_query.find('?') {
459 self.query_start = Some(path_start + pos + 1);
460 }
461 }
462
463 #[inline]
465 fn scheme(&self) -> &str {
466 &self.serialized[..self.hierarchy_start - 1]
467 }
468
469 fn auth(&self) -> Option<&str> {
471 let start = self.username_start?;
472 let end = self.netloc_start - 1;
473
474 Some(&self.serialized[start..end])
475 }
476
477 fn username(&self) -> Option<&str> {
479 let start = self.username_start?;
480
481 let end = self.password_start.unwrap_or(self.netloc_start) - 1;
482
483 Some(&self.serialized[start..end])
484 }
485
486 fn password(&self) -> Option<&str> {
488 self.password_start
489 .map(|start| &self.serialized[start..self.netloc_start - 1])
490 }
491
492 fn netloc(&self) -> &str {
494 let end = self.path_start.unwrap_or(self.serialized.len());
495
496 &self.serialized[self.netloc_start..end]
497 }
498
499 fn host(&self) -> &str {
501 let end = self
502 .port_start
503 .map(|pos| pos - 1)
504 .or(self.path_start)
505 .unwrap_or(self.serialized.len());
506
507 &self.serialized[self.netloc_start..end]
508 }
509
510 #[inline]
512 fn port(&self) -> Option<u16> {
513 self.port
514 }
515
516 fn path(&self) -> &str {
518 if let Some(start) = self.path_start {
519 let end = self
520 .query_start
521 .or(self.fragment_start)
522 .map(|pos| pos - 1)
523 .unwrap_or(self.serialized.len());
524
525 &self.serialized[start..end]
526 } else {
527 "/"
528 }
529 }
530
531 fn query(&self) -> Option<&str> {
533 let start = self.query_start?;
534
535 let end = self
536 .fragment_start
537 .map(|pos| pos - 1)
538 .unwrap_or(self.serialized.len());
539
540 Some(&self.serialized[start..end])
541 }
542
543 fn path_with_query(&self) -> &str {
545 if let Some(start) = self.path_start {
546 let end = self
547 .fragment_start
548 .map(|idx| idx - 1)
549 .unwrap_or(self.serialized.len());
550
551 &self.serialized[start..end]
552 } else {
553 "/"
554 }
555 }
556
557 fn path_with_query_and_fragment(&self) -> &str {
559 if let Some(start) = self.path_start {
560 &self.serialized[start..]
561 } else {
562 "/"
563 }
564 }
565
566 fn fragment(&self) -> Option<&str> {
568 self.fragment_start.map(|start| &self.serialized[start..])
569 }
570
571 fn base_url_str(&self) -> &str {
576 let end = self.path_start.unwrap_or(self.serialized.len());
577
578 &self.serialized[..end]
579 }
580
581 fn base_url(&self) -> Self {
584 Self {
585 serialized: String::from(self.base_url_str()),
586 hierarchy_start: self.hierarchy_start,
587 netloc_start: self.netloc_start,
588 username_start: self.username_start,
589 password_start: self.password_start,
590 port_start: self.port_start,
591 path_start: None,
592 query_start: None,
593 fragment_start: None,
594 port: self.port,
595 }
596 }
597}
598
599fn is_valid_scheme(value: &str) -> bool {
601 let mut chars = value.chars();
602
603 let starts_with_ascii_alphabetic = chars
604 .next()
605 .map(|c| c.is_ascii_alphabetic())
606 .unwrap_or(false);
607
608 starts_with_ascii_alphabetic
609 && chars.all(|c| c.is_ascii_alphanumeric() || c == '+' || c == '-' || c == '.')
610}
611
612fn is_absolute_url(value: &str) -> bool {
614 value
615 .split_once(':')
616 .map(|(scheme, hierarchy)| is_valid_scheme(scheme) && hierarchy.starts_with("//"))
617 .unwrap_or(false)
618}
619
620#[derive(Clone)]
622pub struct UrlEncoded<'a> {
623 inner: PercentEncode<'a>,
624}
625
626impl Display for UrlEncoded<'_> {
627 #[inline]
628 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
629 Display::fmt(&self.inner, f)
630 }
631}
632
633pub fn url_encode<'a, T>(s: &'a T) -> UrlEncoded<'a>
635where
636 T: AsRef<[u8]> + ?Sized,
637{
638 fn inner(input: &[u8]) -> UrlEncoded<'_> {
640 UrlEncoded {
641 inner: percent_encode(input, NON_ALPHANUMERIC),
642 }
643 }
644
645 inner(s.as_ref())
646}
647
648pub fn url_decode(s: &str) -> Cow<'_, [u8]> {
650 Cow::from(percent_decode_str(s))
651}
652
653#[cfg(test)]
654mod test {
655 use super::*;
656
657 #[test]
658 fn test_plain_hostname() {
659 let url = Url::from_str("foo");
660
661 assert!(url.is_err());
662 }
663
664 #[test]
665 fn test_no_authority() {
666 let url = Url::from_str("foo:bar");
667
668 assert!(url.is_err());
669 }
670
671 #[test]
672 fn test_invalid_port() {
673 let url = Url::from_str("http://foo:100000");
674
675 assert!(url.is_err());
676 }
677
678 #[test]
679 fn test_minimal_url() {
680 let url = Url::from_str("http://foo").unwrap();
681
682 assert_eq!(url.scheme(), "http");
683 assert_eq!(url.username(), None);
684 assert_eq!(url.password(), None);
685 assert_eq!(url.host(), "foo");
686 assert_eq!(url.port(), None);
687 assert_eq!(url.path(), "/");
688 assert_eq!(url.query(), None);
689 assert_eq!(url.fragment(), None);
690 }
691
692 #[test]
693 fn test_empty_port() {
694 let url = Url::from_str("http://foo:12").unwrap();
695
696 assert_eq!(url.scheme(), "http");
697 assert_eq!(url.username(), None);
698 assert_eq!(url.password(), None);
699 assert_eq!(url.host(), "foo");
700 assert_eq!(url.port(), Some(12));
701 assert_eq!(url.path(), "/");
702 assert_eq!(url.query(), None);
703 assert_eq!(url.fragment(), None);
704 }
705
706 #[test]
707 fn test_empty_username() {
708 let url = Url::from_str("http://@foo/some/path").unwrap();
709
710 assert_eq!(url.scheme(), "http");
711 assert_eq!(url.username(), Some(""));
712 assert_eq!(url.password(), None);
713 assert_eq!(url.host(), "foo");
714 assert_eq!(url.port(), None);
715 assert_eq!(url.path(), "/some/path");
716 assert_eq!(url.query(), None);
717 assert_eq!(url.fragment(), None);
718 }
719
720 #[test]
721 fn test_no_password() {
722 let url = Url::from_str("http://user@foo/").unwrap();
723
724 assert_eq!(url.scheme(), "http");
725 assert_eq!(url.username(), Some("user"));
726 assert_eq!(url.password(), None);
727 assert_eq!(url.host(), "foo");
728 assert_eq!(url.port(), None);
729 assert_eq!(url.path(), "/");
730 assert_eq!(url.query(), None);
731 assert_eq!(url.fragment(), None);
732 }
733
734 #[test]
735 fn test_empty_password() {
736 let url = Url::from_str("http://user:@foo/").unwrap();
737
738 assert_eq!(url.scheme(), "http");
739 assert_eq!(url.username(), Some("user"));
740 assert_eq!(url.password(), Some(""));
741 assert_eq!(url.host(), "foo");
742 assert_eq!(url.port(), None);
743 assert_eq!(url.path(), "/");
744 assert_eq!(url.query(), None);
745 assert_eq!(url.fragment(), None);
746 }
747
748 #[test]
749 fn test_password() {
750 let url = Url::from_str("http://user:pass@foo/").unwrap();
751
752 assert_eq!(url.scheme(), "http");
753 assert_eq!(url.username(), Some("user"));
754 assert_eq!(url.password(), Some("pass"));
755 assert_eq!(url.host(), "foo");
756 assert_eq!(url.port(), None);
757 assert_eq!(url.path(), "/");
758 assert_eq!(url.query(), None);
759 assert_eq!(url.fragment(), None);
760 }
761
762 #[test]
763 fn test_fragment_and_query() {
764 let url = Url::from_str("http://foo/some/path?and=query&a=b#and-fragment").unwrap();
765
766 assert_eq!(url.scheme(), "http");
767 assert_eq!(url.username(), None);
768 assert_eq!(url.password(), None);
769 assert_eq!(url.host(), "foo");
770 assert_eq!(url.port(), None);
771 assert_eq!(url.path(), "/some/path");
772 assert_eq!(url.query(), Some("and=query&a=b"));
773 assert_eq!(url.fragment(), Some("and-fragment"));
774 }
775
776 #[test]
777 fn test_query_alone() {
778 let url = Url::from_str("http://foo/some/path?and=query&a=b").unwrap();
779
780 assert_eq!(url.scheme(), "http");
781 assert_eq!(url.username(), None);
782 assert_eq!(url.password(), None);
783 assert_eq!(url.host(), "foo");
784 assert_eq!(url.port(), None);
785 assert_eq!(url.path(), "/some/path");
786 assert_eq!(url.query(), Some("and=query&a=b"));
787 assert_eq!(url.fragment(), None);
788 }
789
790 #[test]
791 fn test_fragment_alone() {
792 let url = Url::from_str("http://foo/some/path#and-fragment").unwrap();
793
794 assert_eq!(url.scheme(), "http");
795 assert_eq!(url.username(), None);
796 assert_eq!(url.password(), None);
797 assert_eq!(url.host(), "foo");
798 assert_eq!(url.port(), None);
799 assert_eq!(url.path(), "/some/path");
800 assert_eq!(url.query(), None);
801 assert_eq!(url.fragment(), Some("and-fragment"));
802 }
803
804 #[test]
805 fn test_full_featured_url() {
806 let url =
807 Url::from_str("http://user:pass@foo:123/some/path?and=query&a=b#and-fragment").unwrap();
808
809 assert_eq!(url.scheme(), "http");
810 assert_eq!(url.username(), Some("user"));
811 assert_eq!(url.password(), Some("pass"));
812 assert_eq!(url.host(), "foo");
813 assert_eq!(url.port(), Some(123));
814 assert_eq!(url.path(), "/some/path");
815 assert_eq!(url.query(), Some("and=query&a=b"));
816 assert_eq!(url.fragment(), Some("and-fragment"));
817 }
818
819 #[test]
820 fn test_joining() {
821 let base_url = Url::from_str("http://foo").unwrap();
822
823 let n1 = base_url.join("").unwrap();
824 let n2 = base_url.join("/foo").unwrap();
825 let n3 = base_url.join("bar").unwrap();
826
827 assert_eq!(n1.as_ref(), "http://foo");
828 assert_eq!(n2.as_ref(), "http://foo/foo");
829 assert_eq!(n3.as_ref(), "http://foo/bar");
830
831 let base_url = Url::from_str("http://foo/").unwrap();
832
833 let n1 = base_url.join("").unwrap();
834 let n2 = base_url.join("/foo").unwrap();
835 let n3 = base_url.join("bar").unwrap();
836
837 assert_eq!(n1.as_ref(), "http://foo/");
838 assert_eq!(n2.as_ref(), "http://foo/foo");
839 assert_eq!(n3.as_ref(), "http://foo/bar");
840
841 let base_url = Url::from_str("http://foo/hello").unwrap();
842
843 let n1 = base_url.join("").unwrap();
844 let n2 = base_url.join("/foo").unwrap();
845 let n3 = base_url.join("bar").unwrap();
846
847 assert_eq!(n1.as_ref(), "http://foo/hello");
848 assert_eq!(n2.as_ref(), "http://foo/foo");
849 assert_eq!(n3.as_ref(), "http://foo/bar");
850
851 let base_url = Url::from_str("http://foo/hello/world").unwrap();
852
853 let n1 = base_url.join("").unwrap();
854 let n2 = base_url.join("/foo").unwrap();
855 let n3 = base_url.join("bar").unwrap();
856 let n4 = base_url.join("//hello/world").unwrap();
857 let n5 = base_url.join("https://hello/world").unwrap();
858
859 assert_eq!(n1.as_ref(), "http://foo/hello/world");
860 assert_eq!(n2.as_ref(), "http://foo/foo");
861 assert_eq!(n3.as_ref(), "http://foo/hello/bar");
862 assert_eq!(n4.as_ref(), "http://hello/world");
863 assert_eq!(n5.as_ref(), "https://hello/world");
864 }
865}