url/lib.rs
1// Copyright 2013-2015 The rust-url developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9/*!
10
11rust-url is an implementation of the [URL Standard](http://url.spec.whatwg.org/)
12for the [Rust](http://rust-lang.org/) programming language.
13
14
15# URL parsing and data structures
16
17First, URL parsing may fail for various reasons and therefore returns a `Result`.
18
19```
20use url::{Url, ParseError};
21
22assert!(Url::parse("http://[:::1]") == Err(ParseError::InvalidIpv6Address))
23```
24
25Let’s parse a valid URL and look at its components.
26
27```
28use url::{Url, Host, Position};
29# use url::ParseError;
30# fn run() -> Result<(), ParseError> {
31let issue_list_url = Url::parse(
32 "https://github.com/rust-lang/rust/issues?labels=E-easy&state=open"
33)?;
34
35
36assert!(issue_list_url.scheme() == "https");
37assert!(issue_list_url.username() == "");
38assert!(issue_list_url.password() == None);
39assert!(issue_list_url.host_str() == Some("github.com"));
40assert!(issue_list_url.host() == Some(Host::Domain("github.com")));
41assert!(issue_list_url.port() == None);
42assert!(issue_list_url.path() == "/rust-lang/rust/issues");
43assert!(issue_list_url.path_segments().map(|c| c.collect::<Vec<_>>()) ==
44 Some(vec!["rust-lang", "rust", "issues"]));
45assert!(issue_list_url.query() == Some("labels=E-easy&state=open"));
46assert!(&issue_list_url[Position::BeforePath..] == "/rust-lang/rust/issues?labels=E-easy&state=open");
47assert!(issue_list_url.fragment() == None);
48assert!(!issue_list_url.cannot_be_a_base());
49# Ok(())
50# }
51# run().unwrap();
52```
53
54Some URLs are said to be *cannot-be-a-base*:
55they don’t have a username, password, host, or port,
56and their "path" is an arbitrary string rather than slash-separated segments:
57
58```
59use url::Url;
60# use url::ParseError;
61
62# fn run() -> Result<(), ParseError> {
63let data_url = Url::parse("data:text/plain,Hello?World#")?;
64
65assert!(data_url.cannot_be_a_base());
66assert!(data_url.scheme() == "data");
67assert!(data_url.path() == "text/plain,Hello");
68assert!(data_url.path_segments().is_none());
69assert!(data_url.query() == Some("World"));
70assert!(data_url.fragment() == Some(""));
71# Ok(())
72# }
73# run().unwrap();
74```
75
76## Serde
77
78Enable the `serde` feature to include `Deserialize` and `Serialize` implementations for `url::Url`.
79
80# Base URL
81
82Many contexts allow URL *references* that can be relative to a *base URL*:
83
84```html
85<link rel="stylesheet" href="../main.css">
86```
87
88Since parsed URLs are absolute, giving a base is required for parsing relative URLs:
89
90```
91use url::{Url, ParseError};
92
93assert!(Url::parse("../main.css") == Err(ParseError::RelativeUrlWithoutBase))
94```
95
96Use the `join` method on an `Url` to use it as a base URL:
97
98```
99use url::Url;
100# use url::ParseError;
101
102# fn run() -> Result<(), ParseError> {
103let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html")?;
104let css_url = this_document.join("../main.css")?;
105assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css");
106# Ok(())
107# }
108# run().unwrap();
109```
110
111# Feature: `serde`
112
113If you enable the `serde` feature, [`Url`](struct.Url.html) will implement
114[`serde::Serialize`](https://docs.rs/serde/1/serde/trait.Serialize.html) and
115[`serde::Deserialize`](https://docs.rs/serde/1/serde/trait.Deserialize.html).
116See [serde documentation](https://serde.rs) for more information.
117
118```toml
119url = { version = "2", features = ["serde"] }
120```
121
122# Feature: `debugger_visualizer`
123
124If you enable the `debugger_visualizer` feature, the `url` crate will include
125a [natvis file](https://docs.microsoft.com/en-us/visualstudio/debugger/create-custom-views-of-native-objects)
126for [Visual Studio](https://www.visualstudio.com/) that allows you to view
127[`Url`](struct.Url.html) objects in the debugger.
128
129This feature requires Rust 1.71 or later.
130
131```toml
132url = { version = "2", features = ["debugger_visualizer"] }
133```
134
135*/
136
137#![doc(html_root_url = "https://docs.rs/url/2.5.0")]
138#![cfg_attr(
139 feature = "debugger_visualizer",
140 debugger_visualizer(natvis_file = "../../debug_metadata/url.natvis")
141)]
142
143pub use form_urlencoded;
144
145#[cfg(feature = "serde")]
146extern crate serde;
147
148use crate::host::HostInternal;
149use crate::parser::{to_u32, Context, Parser, SchemeType, PATH_SEGMENT, USERINFO};
150use percent_encoding::{percent_decode, percent_encode, utf8_percent_encode};
151use std::borrow::Borrow;
152use std::cmp;
153use std::fmt::{self, Write};
154use std::hash;
155use std::io;
156use std::mem;
157use scionnet::{IpAddr, SocketAddr, ToSocketAddrs};
158use std::ops::{Range, RangeFrom, RangeTo};
159use std::path::{Path, PathBuf};
160use std::str;
161
162use std::convert::TryFrom;
163
164pub use crate::host::Host;
165pub use crate::origin::{OpaqueOrigin, Origin};
166pub use crate::parser::{ParseError, SyntaxViolation};
167pub use crate::path_segments::PathSegmentsMut;
168pub use crate::slicing::Position;
169pub use form_urlencoded::EncodingOverride;
170
171mod host;
172mod origin;
173mod parser;
174mod path_segments;
175mod slicing;
176
177#[doc(hidden)]
178pub mod quirks;
179
180/// A parsed URL record.
181#[derive(Clone)]
182pub struct Url {
183 /// Syntax in pseudo-BNF:
184 ///
185 /// url = scheme ":" [ hierarchical | non-hierarchical ] [ "?" query ]? [ "#" fragment ]?
186 /// non-hierarchical = non-hierarchical-path
187 /// non-hierarchical-path = /* Does not start with "/" */
188 /// hierarchical = authority? hierarchical-path
189 /// authority = "//" userinfo? host [ ":" port ]?
190 /// userinfo = username [ ":" password ]? "@"
191 /// hierarchical-path = [ "/" path-segment ]+
192 serialization: String,
193
194 // Components
195 scheme_end: u32, // Before ':'
196 username_end: u32, // Before ':' (if a password is given) or '@' (if not)
197 host_start: u32,
198 host_end: u32,
199 host: HostInternal,
200 port: Option<u16>,
201 path_start: u32, // Before initial '/', if any
202 query_start: Option<u32>, // Before '?', unlike Position::QueryStart
203 fragment_start: Option<u32>, // Before '#', unlike Position::FragmentStart
204}
205
206/// Full configuration for the URL parser.
207#[derive(Copy, Clone)]
208#[must_use]
209pub struct ParseOptions<'a> {
210 base_url: Option<&'a Url>,
211 encoding_override: EncodingOverride<'a>,
212 violation_fn: Option<&'a dyn Fn(SyntaxViolation)>,
213}
214
215impl<'a> ParseOptions<'a> {
216 /// Change the base URL
217 pub fn base_url(mut self, new: Option<&'a Url>) -> Self {
218 self.base_url = new;
219 self
220 }
221
222 /// Override the character encoding of query strings.
223 /// This is a legacy concept only relevant for HTML.
224 pub fn encoding_override(mut self, new: EncodingOverride<'a>) -> Self {
225 self.encoding_override = new;
226 self
227 }
228
229 /// Call the provided function or closure for a non-fatal `SyntaxViolation`
230 /// when it occurs during parsing. Note that since the provided function is
231 /// `Fn`, the caller might need to utilize _interior mutability_, such as with
232 /// a `RefCell`, to collect the violations.
233 ///
234 /// ## Example
235 /// ```
236 /// use std::cell::RefCell;
237 /// use url::{Url, SyntaxViolation};
238 /// # use url::ParseError;
239 /// # fn run() -> Result<(), url::ParseError> {
240 /// let violations = RefCell::new(Vec::new());
241 /// let url = Url::options()
242 /// .syntax_violation_callback(Some(&|v| violations.borrow_mut().push(v)))
243 /// .parse("https:////example.com")?;
244 /// assert_eq!(url.as_str(), "https://example.com/");
245 /// assert_eq!(violations.into_inner(),
246 /// vec!(SyntaxViolation::ExpectedDoubleSlash));
247 /// # Ok(())
248 /// # }
249 /// # run().unwrap();
250 /// ```
251 pub fn syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self {
252 self.violation_fn = new;
253 self
254 }
255
256 /// Parse an URL string with the configuration so far.
257 pub fn parse(self, input: &str) -> Result<Url, crate::ParseError> {
258 Parser {
259 serialization: String::with_capacity(input.len()),
260 base_url: self.base_url,
261 query_encoding_override: self.encoding_override,
262 violation_fn: self.violation_fn,
263 context: Context::UrlParser,
264 }
265 .parse_url(input)
266 }
267}
268
269impl Url {
270 /// Parse an absolute URL from a string.
271 ///
272 /// # Examples
273 ///
274 /// ```rust
275 /// use url::Url;
276 /// # use url::ParseError;
277 ///
278 /// # fn run() -> Result<(), ParseError> {
279 /// let url = Url::parse("https://example.net")?;
280 /// # Ok(())
281 /// # }
282 /// # run().unwrap();
283 /// ```
284 ///
285 /// # Errors
286 ///
287 /// If the function can not parse an absolute URL from the given string,
288 /// a [`ParseError`] variant will be returned.
289 ///
290 /// [`ParseError`]: enum.ParseError.html
291 #[inline]
292 pub fn parse(input: &str) -> Result<Url, crate::ParseError> {
293 Url::options().parse(input)
294 }
295
296 /// Parse an absolute URL from a string and add params to its query string.
297 ///
298 /// Existing params are not removed.
299 ///
300 /// # Examples
301 ///
302 /// ```rust
303 /// use url::Url;
304 /// # use url::ParseError;
305 ///
306 /// # fn run() -> Result<(), ParseError> {
307 /// let url = Url::parse_with_params("https://example.net?dont=clobberme",
308 /// &[("lang", "rust"), ("browser", "servo")])?;
309 /// assert_eq!("https://example.net/?dont=clobberme&lang=rust&browser=servo", url.as_str());
310 /// # Ok(())
311 /// # }
312 /// # run().unwrap();
313 /// ```
314 ///
315 /// # Errors
316 ///
317 /// If the function can not parse an absolute URL from the given string,
318 /// a [`ParseError`] variant will be returned.
319 ///
320 /// [`ParseError`]: enum.ParseError.html
321 #[inline]
322 pub fn parse_with_params<I, K, V>(input: &str, iter: I) -> Result<Url, crate::ParseError>
323 where
324 I: IntoIterator,
325 I::Item: Borrow<(K, V)>,
326 K: AsRef<str>,
327 V: AsRef<str>,
328 {
329 let mut url = Url::options().parse(input);
330
331 if let Ok(ref mut url) = url {
332 url.query_pairs_mut().extend_pairs(iter);
333 }
334
335 url
336 }
337
338 /// https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path
339 fn strip_trailing_spaces_from_opaque_path(&mut self) {
340 if !self.cannot_be_a_base() {
341 return;
342 }
343
344 if self.fragment_start.is_some() {
345 return;
346 }
347
348 if self.query_start.is_some() {
349 return;
350 }
351
352 let trailing_space_count = self
353 .serialization
354 .chars()
355 .rev()
356 .take_while(|c| *c == ' ')
357 .count();
358
359 let start = self.serialization.len() - trailing_space_count;
360
361 self.serialization.truncate(start);
362 }
363
364 /// Parse a string as an URL, with this URL as the base URL.
365 ///
366 /// The inverse of this is [`make_relative`].
367 ///
368 /// Note: a trailing slash is significant.
369 /// Without it, the last path component is considered to be a “file” name
370 /// to be removed to get at the “directory” that is used as the base:
371 ///
372 /// # Examples
373 ///
374 /// ```rust
375 /// use url::Url;
376 /// # use url::ParseError;
377 ///
378 /// # fn run() -> Result<(), ParseError> {
379 /// let base = Url::parse("https://example.net/a/b.html")?;
380 /// let url = base.join("c.png")?;
381 /// assert_eq!(url.as_str(), "https://example.net/a/c.png"); // Not /a/b.html/c.png
382 ///
383 /// let base = Url::parse("https://example.net/a/b/")?;
384 /// let url = base.join("c.png")?;
385 /// assert_eq!(url.as_str(), "https://example.net/a/b/c.png");
386 /// # Ok(())
387 /// # }
388 /// # run().unwrap();
389 /// ```
390 ///
391 /// # Errors
392 ///
393 /// If the function can not parse an URL from the given string
394 /// with this URL as the base URL, a [`ParseError`] variant will be returned.
395 ///
396 /// [`ParseError`]: enum.ParseError.html
397 /// [`make_relative`]: #method.make_relative
398 #[inline]
399 pub fn join(&self, input: &str) -> Result<Url, crate::ParseError> {
400 Url::options().base_url(Some(self)).parse(input)
401 }
402
403 /// Creates a relative URL if possible, with this URL as the base URL.
404 ///
405 /// This is the inverse of [`join`].
406 ///
407 /// # Examples
408 ///
409 /// ```rust
410 /// use url::Url;
411 /// # use url::ParseError;
412 ///
413 /// # fn run() -> Result<(), ParseError> {
414 /// let base = Url::parse("https://example.net/a/b.html")?;
415 /// let url = Url::parse("https://example.net/a/c.png")?;
416 /// let relative = base.make_relative(&url);
417 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png"));
418 ///
419 /// let base = Url::parse("https://example.net/a/b/")?;
420 /// let url = Url::parse("https://example.net/a/b/c.png")?;
421 /// let relative = base.make_relative(&url);
422 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png"));
423 ///
424 /// let base = Url::parse("https://example.net/a/b/")?;
425 /// let url = Url::parse("https://example.net/a/d/c.png")?;
426 /// let relative = base.make_relative(&url);
427 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("../d/c.png"));
428 ///
429 /// let base = Url::parse("https://example.net/a/b.html?c=d")?;
430 /// let url = Url::parse("https://example.net/a/b.html?e=f")?;
431 /// let relative = base.make_relative(&url);
432 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("?e=f"));
433 /// # Ok(())
434 /// # }
435 /// # run().unwrap();
436 /// ```
437 ///
438 /// # Errors
439 ///
440 /// If this URL can't be a base for the given URL, `None` is returned.
441 /// This is for example the case if the scheme, host or port are not the same.
442 ///
443 /// [`join`]: #method.join
444 pub fn make_relative(&self, url: &Url) -> Option<String> {
445 if self.cannot_be_a_base() {
446 return None;
447 }
448
449 // Scheme, host and port need to be the same
450 if self.scheme() != url.scheme() || self.host() != url.host() || self.port() != url.port() {
451 return None;
452 }
453
454 // We ignore username/password at this point
455
456 // The path has to be transformed
457 let mut relative = String::new();
458
459 // Extract the filename of both URIs, these need to be handled separately
460 fn extract_path_filename(s: &str) -> (&str, &str) {
461 let last_slash_idx = s.rfind('/').unwrap_or(0);
462 let (path, filename) = s.split_at(last_slash_idx);
463 if filename.is_empty() {
464 (path, "")
465 } else {
466 (path, &filename[1..])
467 }
468 }
469
470 let (base_path, base_filename) = extract_path_filename(self.path());
471 let (url_path, url_filename) = extract_path_filename(url.path());
472
473 let mut base_path = base_path.split('/').peekable();
474 let mut url_path = url_path.split('/').peekable();
475
476 // Skip over the common prefix
477 while base_path.peek().is_some() && base_path.peek() == url_path.peek() {
478 base_path.next();
479 url_path.next();
480 }
481
482 // Add `..` segments for the remainder of the base path
483 for base_path_segment in base_path {
484 // Skip empty last segments
485 if base_path_segment.is_empty() {
486 break;
487 }
488
489 if !relative.is_empty() {
490 relative.push('/');
491 }
492
493 relative.push_str("..");
494 }
495
496 // Append the remainder of the other URI
497 for url_path_segment in url_path {
498 if !relative.is_empty() {
499 relative.push('/');
500 }
501
502 relative.push_str(url_path_segment);
503 }
504
505 // Add the filename if they are not the same
506 if !relative.is_empty() || base_filename != url_filename {
507 // If the URIs filename is empty this means that it was a directory
508 // so we'll have to append a '/'.
509 //
510 // Otherwise append it directly as the new filename.
511 if url_filename.is_empty() {
512 relative.push('/');
513 } else {
514 if !relative.is_empty() {
515 relative.push('/');
516 }
517 relative.push_str(url_filename);
518 }
519 }
520
521 // Query and fragment are only taken from the other URI
522 if let Some(query) = url.query() {
523 relative.push('?');
524 relative.push_str(query);
525 }
526
527 if let Some(fragment) = url.fragment() {
528 relative.push('#');
529 relative.push_str(fragment);
530 }
531
532 Some(relative)
533 }
534
535 /// Return a default `ParseOptions` that can fully configure the URL parser.
536 ///
537 /// # Examples
538 ///
539 /// Get default `ParseOptions`, then change base url
540 ///
541 /// ```rust
542 /// use url::Url;
543 /// # use url::ParseError;
544 /// # fn run() -> Result<(), ParseError> {
545 /// let options = Url::options();
546 /// let api = Url::parse("https://api.example.com")?;
547 /// let base_url = options.base_url(Some(&api));
548 /// let version_url = base_url.parse("version.json")?;
549 /// assert_eq!(version_url.as_str(), "https://api.example.com/version.json");
550 /// # Ok(())
551 /// # }
552 /// # run().unwrap();
553 /// ```
554 pub fn options<'a>() -> ParseOptions<'a> {
555 ParseOptions {
556 base_url: None,
557 encoding_override: None,
558 violation_fn: None,
559 }
560 }
561
562 /// Return the serialization of this URL.
563 ///
564 /// This is fast since that serialization is already stored in the `Url` struct.
565 ///
566 /// # Examples
567 ///
568 /// ```rust
569 /// use url::Url;
570 /// # use url::ParseError;
571 ///
572 /// # fn run() -> Result<(), ParseError> {
573 /// let url_str = "https://example.net/";
574 /// let url = Url::parse(url_str)?;
575 /// assert_eq!(url.as_str(), url_str);
576 /// # Ok(())
577 /// # }
578 /// # run().unwrap();
579 /// ```
580 #[inline]
581 pub fn as_str(&self) -> &str {
582 &self.serialization
583 }
584
585 /// Return the serialization of this URL.
586 ///
587 /// This consumes the `Url` and takes ownership of the `String` stored in it.
588 ///
589 /// # Examples
590 ///
591 /// ```rust
592 /// use url::Url;
593 /// # use url::ParseError;
594 ///
595 /// # fn run() -> Result<(), ParseError> {
596 /// let url_str = "https://example.net/";
597 /// let url = Url::parse(url_str)?;
598 /// assert_eq!(String::from(url), url_str);
599 /// # Ok(())
600 /// # }
601 /// # run().unwrap();
602 /// ```
603 #[inline]
604 #[deprecated(since = "2.3.0", note = "use Into<String>")]
605 pub fn into_string(self) -> String {
606 self.into()
607 }
608
609 /// For internal testing, not part of the public API.
610 ///
611 /// Methods of the `Url` struct assume a number of invariants.
612 /// This checks each of these invariants and panic if one is not met.
613 /// This is for testing rust-url itself.
614 #[doc(hidden)]
615 pub fn check_invariants(&self) -> Result<(), String> {
616 macro_rules! assert {
617 ($x: expr) => {
618 if !$x {
619 return Err(format!(
620 "!( {} ) for URL {:?}",
621 stringify!($x),
622 self.serialization
623 ));
624 }
625 };
626 }
627
628 macro_rules! assert_eq {
629 ($a: expr, $b: expr) => {
630 {
631 let a = $a;
632 let b = $b;
633 if a != b {
634 return Err(format!("{:?} != {:?} ({} != {}) for URL {:?}",
635 a, b, stringify!($a), stringify!($b),
636 self.serialization))
637 }
638 }
639 }
640 }
641
642 assert!(self.scheme_end >= 1);
643 assert!(self.byte_at(0).is_ascii_alphabetic());
644 assert!(self
645 .slice(1..self.scheme_end)
646 .chars()
647 .all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.')));
648 assert_eq!(self.byte_at(self.scheme_end), b':');
649
650 if self.slice(self.scheme_end + 1..).starts_with("//") {
651 // URL with authority
652 if self.username_end != self.serialization.len() as u32 {
653 match self.byte_at(self.username_end) {
654 b':' => {
655 assert!(self.host_start >= self.username_end + 2);
656 assert_eq!(self.byte_at(self.host_start - 1), b'@');
657 }
658 b'@' => assert!(self.host_start == self.username_end + 1),
659 _ => assert_eq!(self.username_end, self.scheme_end + 3),
660 }
661 }
662 assert!(self.host_start >= self.username_end);
663 assert!(self.host_end >= self.host_start);
664 let host_str = self.slice(self.host_start..self.host_end);
665 match self.host {
666 HostInternal::None => assert_eq!(host_str, ""),
667 HostInternal::Ipv4(address) => assert_eq!(host_str, address.to_string()),
668 HostInternal::Ipv6(address) => {
669 let h: Host<String> = Host::Ipv6(address);
670 assert_eq!(host_str, h.to_string())
671 },
672 HostInternal::Scion(address) => {
673 let h: Host<String> = Host::Scion(address);
674 assert_eq!(host_str, h.to_string())
675 },
676 HostInternal::Domain => {
677 if SchemeType::from(self.scheme()).is_special() {
678 assert!(!host_str.is_empty())
679 }
680 }
681 }
682 if self.path_start == self.host_end {
683 assert_eq!(self.port, None);
684 } else {
685 assert_eq!(self.byte_at(self.host_end), b':');
686 let port_str = self.slice(self.host_end + 1..self.path_start);
687 assert_eq!(
688 self.port,
689 Some(port_str.parse::<u16>().expect("Couldn't parse port?"))
690 );
691 }
692 assert!(
693 self.path_start as usize == self.serialization.len()
694 || matches!(self.byte_at(self.path_start), b'/' | b'#' | b'?')
695 );
696 } else {
697 // Anarchist URL (no authority)
698 assert_eq!(self.username_end, self.scheme_end + 1);
699 assert_eq!(self.host_start, self.scheme_end + 1);
700 assert_eq!(self.host_end, self.scheme_end + 1);
701 assert_eq!(self.host, HostInternal::None);
702 assert_eq!(self.port, None);
703 if self.path().starts_with("//") {
704 // special case when first path segment is empty
705 assert_eq!(self.byte_at(self.scheme_end + 1), b'/');
706 assert_eq!(self.byte_at(self.scheme_end + 2), b'.');
707 assert_eq!(self.path_start, self.scheme_end + 3);
708 } else {
709 assert_eq!(self.path_start, self.scheme_end + 1);
710 }
711 }
712 if let Some(start) = self.query_start {
713 assert!(start >= self.path_start);
714 assert_eq!(self.byte_at(start), b'?');
715 }
716 if let Some(start) = self.fragment_start {
717 assert!(start >= self.path_start);
718 assert_eq!(self.byte_at(start), b'#');
719 }
720 if let (Some(query_start), Some(fragment_start)) = (self.query_start, self.fragment_start) {
721 assert!(fragment_start > query_start);
722 }
723
724 let other = Url::parse(self.as_str()).expect("Failed to parse myself?");
725 assert_eq!(&self.serialization, &other.serialization);
726 assert_eq!(self.scheme_end, other.scheme_end);
727 assert_eq!(self.username_end, other.username_end);
728 assert_eq!(self.host_start, other.host_start);
729 assert_eq!(self.host_end, other.host_end);
730 assert!(
731 self.host == other.host ||
732 // XXX No host round-trips to empty host.
733 // See https://github.com/whatwg/url/issues/79
734 (self.host_str(), other.host_str()) == (None, Some(""))
735 );
736 assert_eq!(self.port, other.port);
737 assert_eq!(self.path_start, other.path_start);
738 assert_eq!(self.query_start, other.query_start);
739 assert_eq!(self.fragment_start, other.fragment_start);
740 Ok(())
741 }
742
743 /// Return the origin of this URL (<https://url.spec.whatwg.org/#origin>)
744 ///
745 /// Note: this returns an opaque origin for `file:` URLs, which causes
746 /// `url.origin() != url.origin()`.
747 ///
748 /// # Examples
749 ///
750 /// URL with `ftp` scheme:
751 ///
752 /// ```rust
753 /// use url::{Host, Origin, Url};
754 /// # use url::ParseError;
755 ///
756 /// # fn run() -> Result<(), ParseError> {
757 /// let url = Url::parse("ftp://example.com/foo")?;
758 /// assert_eq!(url.origin(),
759 /// Origin::Tuple("ftp".into(),
760 /// Host::Domain("example.com".into()),
761 /// 21));
762 /// # Ok(())
763 /// # }
764 /// # run().unwrap();
765 /// ```
766 ///
767 /// URL with `blob` scheme:
768 ///
769 /// ```rust
770 /// use url::{Host, Origin, Url};
771 /// # use url::ParseError;
772 ///
773 /// # fn run() -> Result<(), ParseError> {
774 /// let url = Url::parse("blob:https://example.com/foo")?;
775 /// assert_eq!(url.origin(),
776 /// Origin::Tuple("https".into(),
777 /// Host::Domain("example.com".into()),
778 /// 443));
779 /// # Ok(())
780 /// # }
781 /// # run().unwrap();
782 /// ```
783 ///
784 /// URL with `file` scheme:
785 ///
786 /// ```rust
787 /// use url::{Host, Origin, Url};
788 /// # use url::ParseError;
789 ///
790 /// # fn run() -> Result<(), ParseError> {
791 /// let url = Url::parse("file:///tmp/foo")?;
792 /// assert!(!url.origin().is_tuple());
793 ///
794 /// let other_url = Url::parse("file:///tmp/foo")?;
795 /// assert!(url.origin() != other_url.origin());
796 /// # Ok(())
797 /// # }
798 /// # run().unwrap();
799 /// ```
800 ///
801 /// URL with other scheme:
802 ///
803 /// ```rust
804 /// use url::{Host, Origin, Url};
805 /// # use url::ParseError;
806 ///
807 /// # fn run() -> Result<(), ParseError> {
808 /// let url = Url::parse("foo:bar")?;
809 /// assert!(!url.origin().is_tuple());
810 /// # Ok(())
811 /// # }
812 /// # run().unwrap();
813 /// ```
814 #[inline]
815 pub fn origin(&self) -> Origin {
816 origin::url_origin(self)
817 }
818
819 /// Return the scheme of this URL, lower-cased, as an ASCII string without the ':' delimiter.
820 ///
821 /// # Examples
822 ///
823 /// ```
824 /// use url::Url;
825 /// # use url::ParseError;
826 ///
827 /// # fn run() -> Result<(), ParseError> {
828 /// let url = Url::parse("file:///tmp/foo")?;
829 /// assert_eq!(url.scheme(), "file");
830 /// # Ok(())
831 /// # }
832 /// # run().unwrap();
833 /// ```
834 #[inline]
835 pub fn scheme(&self) -> &str {
836 self.slice(..self.scheme_end)
837 }
838
839 /// Return whether the URL is special (has a special scheme)
840 ///
841 /// # Examples
842 ///
843 /// ```
844 /// use url::Url;
845 /// # use url::ParseError;
846 ///
847 /// # fn run() -> Result<(), ParseError> {
848 /// assert!(Url::parse("http:///tmp/foo")?.is_special());
849 /// assert!(Url::parse("file:///tmp/foo")?.is_special());
850 /// assert!(!Url::parse("moz:///tmp/foo")?.is_special());
851 /// # Ok(())
852 /// # }
853 /// # run().unwrap();
854 /// ```
855 pub fn is_special(&self) -> bool {
856 let scheme_type = SchemeType::from(self.scheme());
857 scheme_type.is_special()
858 }
859
860 /// Return whether the URL has an 'authority',
861 /// which can contain a username, password, host, and port number.
862 ///
863 /// URLs that do *not* are either path-only like `unix:/run/foo.socket`
864 /// or cannot-be-a-base like `data:text/plain,Stuff`.
865 ///
866 /// See also the `authority` method.
867 ///
868 /// # Examples
869 ///
870 /// ```
871 /// use url::Url;
872 /// # use url::ParseError;
873 ///
874 /// # fn run() -> Result<(), ParseError> {
875 /// let url = Url::parse("ftp://rms@example.com")?;
876 /// assert!(url.has_authority());
877 ///
878 /// let url = Url::parse("unix:/run/foo.socket")?;
879 /// assert!(!url.has_authority());
880 ///
881 /// let url = Url::parse("data:text/plain,Stuff")?;
882 /// assert!(!url.has_authority());
883 /// # Ok(())
884 /// # }
885 /// # run().unwrap();
886 /// ```
887 #[inline]
888 pub fn has_authority(&self) -> bool {
889 debug_assert!(self.byte_at(self.scheme_end) == b':');
890 self.slice(self.scheme_end..).starts_with("://")
891 }
892
893 /// Return the authority of this URL as an ASCII string.
894 ///
895 /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
896 /// of a special URL, or percent encoded for non-special URLs.
897 /// IPv6 addresses are given between `[` and `]` brackets.
898 /// Ports are omitted if they match the well known port of a special URL.
899 ///
900 /// Username and password are percent-encoded.
901 ///
902 /// See also the `has_authority` method.
903 ///
904 /// # Examples
905 ///
906 /// ```
907 /// use url::Url;
908 /// # use url::ParseError;
909 ///
910 /// # fn run() -> Result<(), ParseError> {
911 /// let url = Url::parse("unix:/run/foo.socket")?;
912 /// assert_eq!(url.authority(), "");
913 /// let url = Url::parse("file:///tmp/foo")?;
914 /// assert_eq!(url.authority(), "");
915 /// let url = Url::parse("https://user:password@example.com/tmp/foo")?;
916 /// assert_eq!(url.authority(), "user:password@example.com");
917 /// let url = Url::parse("irc://àlex.рф.example.com:6667/foo")?;
918 /// assert_eq!(url.authority(), "%C3%A0lex.%D1%80%D1%84.example.com:6667");
919 /// let url = Url::parse("http://àlex.рф.example.com:80/foo")?;
920 /// assert_eq!(url.authority(), "xn--lex-8ka.xn--p1ai.example.com");
921 /// # Ok(())
922 /// # }
923 /// # run().unwrap();
924 /// ```
925 pub fn authority(&self) -> &str {
926 let scheme_separator_len = "://".len() as u32;
927 if self.has_authority() && self.path_start > self.scheme_end + scheme_separator_len {
928 self.slice(self.scheme_end + scheme_separator_len..self.path_start)
929 } else {
930 ""
931 }
932 }
933
934 /// Return whether this URL is a cannot-be-a-base URL,
935 /// meaning that parsing a relative URL string with this URL as the base will return an error.
936 ///
937 /// This is the case if the scheme and `:` delimiter are not followed by a `/` slash,
938 /// as is typically the case of `data:` and `mailto:` URLs.
939 ///
940 /// # Examples
941 ///
942 /// ```
943 /// use url::Url;
944 /// # use url::ParseError;
945 ///
946 /// # fn run() -> Result<(), ParseError> {
947 /// let url = Url::parse("ftp://rms@example.com")?;
948 /// assert!(!url.cannot_be_a_base());
949 ///
950 /// let url = Url::parse("unix:/run/foo.socket")?;
951 /// assert!(!url.cannot_be_a_base());
952 ///
953 /// let url = Url::parse("data:text/plain,Stuff")?;
954 /// assert!(url.cannot_be_a_base());
955 /// # Ok(())
956 /// # }
957 /// # run().unwrap();
958 /// ```
959 #[inline]
960 pub fn cannot_be_a_base(&self) -> bool {
961 !self.slice(self.scheme_end + 1..).starts_with('/')
962 }
963
964 /// Return the username for this URL (typically the empty string)
965 /// as a percent-encoded ASCII string.
966 ///
967 /// # Examples
968 ///
969 /// ```
970 /// use url::Url;
971 /// # use url::ParseError;
972 ///
973 /// # fn run() -> Result<(), ParseError> {
974 /// let url = Url::parse("ftp://rms@example.com")?;
975 /// assert_eq!(url.username(), "rms");
976 ///
977 /// let url = Url::parse("ftp://:secret123@example.com")?;
978 /// assert_eq!(url.username(), "");
979 ///
980 /// let url = Url::parse("https://example.com")?;
981 /// assert_eq!(url.username(), "");
982 /// # Ok(())
983 /// # }
984 /// # run().unwrap();
985 /// ```
986 pub fn username(&self) -> &str {
987 let scheme_separator_len = "://".len() as u32;
988 if self.has_authority() && self.username_end > self.scheme_end + scheme_separator_len {
989 self.slice(self.scheme_end + scheme_separator_len..self.username_end)
990 } else {
991 ""
992 }
993 }
994
995 /// Return the password for this URL, if any, as a percent-encoded ASCII string.
996 ///
997 /// # Examples
998 ///
999 /// ```
1000 /// use url::Url;
1001 /// # use url::ParseError;
1002 ///
1003 /// # fn run() -> Result<(), ParseError> {
1004 /// let url = Url::parse("ftp://rms:secret123@example.com")?;
1005 /// assert_eq!(url.password(), Some("secret123"));
1006 ///
1007 /// let url = Url::parse("ftp://:secret123@example.com")?;
1008 /// assert_eq!(url.password(), Some("secret123"));
1009 ///
1010 /// let url = Url::parse("ftp://rms@example.com")?;
1011 /// assert_eq!(url.password(), None);
1012 ///
1013 /// let url = Url::parse("https://example.com")?;
1014 /// assert_eq!(url.password(), None);
1015 /// # Ok(())
1016 /// # }
1017 /// # run().unwrap();
1018 /// ```
1019 pub fn password(&self) -> Option<&str> {
1020 // This ':' is not the one marking a port number since a host can not be empty.
1021 // (Except for file: URLs, which do not have port numbers.)
1022 if self.has_authority()
1023 && self.username_end != self.serialization.len() as u32
1024 && self.byte_at(self.username_end) == b':'
1025 {
1026 debug_assert!(self.byte_at(self.host_start - 1) == b'@');
1027 Some(self.slice(self.username_end + 1..self.host_start - 1))
1028 } else {
1029 None
1030 }
1031 }
1032
1033 /// Equivalent to `url.host().is_some()`.
1034 ///
1035 /// # Examples
1036 ///
1037 /// ```
1038 /// use url::Url;
1039 /// # use url::ParseError;
1040 ///
1041 /// # fn run() -> Result<(), ParseError> {
1042 /// let url = Url::parse("ftp://rms@example.com")?;
1043 /// assert!(url.has_host());
1044 ///
1045 /// let url = Url::parse("unix:/run/foo.socket")?;
1046 /// assert!(!url.has_host());
1047 ///
1048 /// let url = Url::parse("data:text/plain,Stuff")?;
1049 /// assert!(!url.has_host());
1050 /// # Ok(())
1051 /// # }
1052 /// # run().unwrap();
1053 /// ```
1054 pub fn has_host(&self) -> bool {
1055 !matches!(self.host, HostInternal::None)
1056 }
1057
1058 /// Return the string representation of the host (domain or IP address) for this URL, if any.
1059 ///
1060 /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
1061 /// of a special URL, or percent encoded for non-special URLs.
1062 /// IPv6 addresses are given between `[` and `]` brackets.
1063 ///
1064 /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
1065 /// don’t have a host.
1066 ///
1067 /// See also the `host` method.
1068 ///
1069 /// # Examples
1070 ///
1071 /// ```
1072 /// use url::Url;
1073 /// # use url::ParseError;
1074 ///
1075 /// # fn run() -> Result<(), ParseError> {
1076 /// let url = Url::parse("https://127.0.0.1/index.html")?;
1077 /// assert_eq!(url.host_str(), Some("127.0.0.1"));
1078 ///
1079 /// let url = Url::parse("ftp://rms@example.com")?;
1080 /// assert_eq!(url.host_str(), Some("example.com"));
1081 ///
1082 /// let url = Url::parse("unix:/run/foo.socket")?;
1083 /// assert_eq!(url.host_str(), None);
1084 ///
1085 /// let url = Url::parse("data:text/plain,Stuff")?;
1086 /// assert_eq!(url.host_str(), None);
1087 /// # Ok(())
1088 /// # }
1089 /// # run().unwrap();
1090 /// ```
1091 pub fn host_str(&self) -> Option<&str> {
1092 if self.has_host() {
1093 Some(self.slice(self.host_start..self.host_end))
1094 } else {
1095 None
1096 }
1097 }
1098
1099 /// Return the parsed representation of the host for this URL.
1100 /// Non-ASCII domain labels are punycode-encoded per IDNA if this is the host
1101 /// of a special URL, or percent encoded for non-special URLs.
1102 ///
1103 /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
1104 /// don’t have a host.
1105 ///
1106 /// See also the `host_str` method.
1107 ///
1108 /// # Examples
1109 ///
1110 /// ```
1111 /// use url::Url;
1112 /// # use url::ParseError;
1113 ///
1114 /// # fn run() -> Result<(), ParseError> {
1115 /// let url = Url::parse("https://127.0.0.1/index.html")?;
1116 /// assert!(url.host().is_some());
1117 ///
1118 /// let url = Url::parse("ftp://rms@example.com")?;
1119 /// assert!(url.host().is_some());
1120 ///
1121 /// let url = Url::parse("unix:/run/foo.socket")?;
1122 /// assert!(url.host().is_none());
1123 ///
1124 /// let url = Url::parse("data:text/plain,Stuff")?;
1125 /// assert!(url.host().is_none());
1126 /// # Ok(())
1127 /// # }
1128 /// # run().unwrap();
1129 /// ```
1130 pub fn host(&self) -> Option<Host<&str>> {
1131 match self.host {
1132 HostInternal::None => None,
1133 HostInternal::Domain => Some(Host::Domain(self.slice(self.host_start..self.host_end))),
1134 HostInternal::Ipv4(address) => Some(Host::Ipv4(address)),
1135 HostInternal::Ipv6(address) => Some(Host::Ipv6(address)),
1136 HostInternal::Scion(address) => Some(Host::Scion(address))
1137 }
1138 }
1139
1140 /// If this URL has a host and it is a domain name (not an IP address), return it.
1141 /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
1142 /// of a special URL, or percent encoded for non-special URLs.
1143 ///
1144 /// # Examples
1145 ///
1146 /// ```
1147 /// use url::Url;
1148 /// # use url::ParseError;
1149 ///
1150 /// # fn run() -> Result<(), ParseError> {
1151 /// let url = Url::parse("https://127.0.0.1/")?;
1152 /// assert_eq!(url.domain(), None);
1153 ///
1154 /// let url = Url::parse("mailto:rms@example.net")?;
1155 /// assert_eq!(url.domain(), None);
1156 ///
1157 /// let url = Url::parse("https://example.com/")?;
1158 /// assert_eq!(url.domain(), Some("example.com"));
1159 /// # Ok(())
1160 /// # }
1161 /// # run().unwrap();
1162 /// ```
1163 pub fn domain(&self) -> Option<&str> {
1164 match self.host {
1165 HostInternal::Domain => Some(self.slice(self.host_start..self.host_end)),
1166 _ => None,
1167 }
1168 }
1169
1170 /// Return the port number for this URL, if any.
1171 ///
1172 /// Note that default port numbers are never reflected by the serialization,
1173 /// use the `port_or_known_default()` method if you want a default port number returned.
1174 ///
1175 /// # Examples
1176 ///
1177 /// ```
1178 /// use url::Url;
1179 /// # use url::ParseError;
1180 ///
1181 /// # fn run() -> Result<(), ParseError> {
1182 /// let url = Url::parse("https://example.com")?;
1183 /// assert_eq!(url.port(), None);
1184 ///
1185 /// let url = Url::parse("https://example.com:443/")?;
1186 /// assert_eq!(url.port(), None);
1187 ///
1188 /// let url = Url::parse("ssh://example.com:22")?;
1189 /// assert_eq!(url.port(), Some(22));
1190 /// # Ok(())
1191 /// # }
1192 /// # run().unwrap();
1193 /// ```
1194 #[inline]
1195 pub fn port(&self) -> Option<u16> {
1196 self.port
1197 }
1198
1199 /// Return the port number for this URL, or the default port number if it is known.
1200 ///
1201 /// This method only knows the default port number
1202 /// of the `http`, `https`, `ws`, `wss` and `ftp` schemes.
1203 ///
1204 /// For URLs in these schemes, this method always returns `Some(_)`.
1205 /// For other schemes, it is the same as `Url::port()`.
1206 ///
1207 /// # Examples
1208 ///
1209 /// ```
1210 /// use url::Url;
1211 /// # use url::ParseError;
1212 ///
1213 /// # fn run() -> Result<(), ParseError> {
1214 /// let url = Url::parse("foo://example.com")?;
1215 /// assert_eq!(url.port_or_known_default(), None);
1216 ///
1217 /// let url = Url::parse("foo://example.com:1456")?;
1218 /// assert_eq!(url.port_or_known_default(), Some(1456));
1219 ///
1220 /// let url = Url::parse("https://example.com")?;
1221 /// assert_eq!(url.port_or_known_default(), Some(443));
1222 /// # Ok(())
1223 /// # }
1224 /// # run().unwrap();
1225 /// ```
1226 #[inline]
1227 pub fn port_or_known_default(&self) -> Option<u16> {
1228 self.port.or_else(|| parser::default_port(self.scheme()))
1229 }
1230
1231 /// Resolve a URL’s host and port number to `SocketAddr`.
1232 ///
1233 /// If the URL has the default port number of a scheme that is unknown to this library,
1234 /// `default_port_number` provides an opportunity to provide the actual port number.
1235 /// In non-example code this should be implemented either simply as `|| None`,
1236 /// or by matching on the URL’s `.scheme()`.
1237 ///
1238 /// If the host is a domain, it is resolved using the standard library’s DNS support.
1239 ///
1240 /// # Examples
1241 ///
1242 /// ```no_run
1243 /// let url = url::Url::parse("https://example.net/").unwrap();
1244 /// let addrs = url.socket_addrs(|| None).unwrap();
1245 /// std::net::TcpStream::connect(&*addrs)
1246 /// # ;
1247 /// ```
1248 ///
1249 /// ```
1250 /// /// With application-specific known default port numbers
1251 /// fn socket_addrs(url: url::Url) -> std::io::Result<Vec<scionnet::SocketAddr>> {
1252 /// url.socket_addrs(|| match url.scheme() {
1253 /// "socks5" | "socks5h" => Some(1080),
1254 /// _ => None,
1255 /// })
1256 /// }
1257 /// ```
1258 pub fn socket_addrs(
1259 &self,
1260 default_port_number: impl Fn() -> Option<u16>,
1261 ) -> io::Result<Vec<SocketAddr>> {
1262 // Note: trying to avoid the Vec allocation by returning `impl AsRef<[SocketAddr]>`
1263 // causes borrowck issues because the return value borrows `default_port_number`:
1264 //
1265 // https://github.com/rust-lang/rfcs/blob/master/text/1951-expand-impl-trait.md#scoping-for-type-and-lifetime-parameters
1266 //
1267 // > This RFC proposes that *all* type parameters are considered in scope
1268 // > for `impl Trait` in return position
1269
1270 fn io_result<T>(opt: Option<T>, message: &str) -> io::Result<T> {
1271 opt.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, message))
1272 }
1273
1274 let host = io_result(self.host(), "No host name in the URL")?;
1275 let port = io_result(
1276 self.port_or_known_default().or_else(default_port_number),
1277 "No port number in the URL",
1278 )?;
1279 Ok(match host {
1280 Host::Domain(domain) => (domain, port).to_socket_addrs()?.collect(),
1281 Host::Ipv4(ip) => vec![(ip, port).into()],
1282 Host::Ipv6(ip) => vec![(ip, port).into()],
1283 Host::Scion(addr) => vec![(addr, port).into()],
1284 })
1285 }
1286
1287 /// Return the path for this URL, as a percent-encoded ASCII string.
1288 /// For cannot-be-a-base URLs, this is an arbitrary string that doesn’t start with '/'.
1289 /// For other URLs, this starts with a '/' slash
1290 /// and continues with slash-separated path segments.
1291 ///
1292 /// # Examples
1293 ///
1294 /// ```rust
1295 /// use url::{Url, ParseError};
1296 ///
1297 /// # fn run() -> Result<(), ParseError> {
1298 /// let url = Url::parse("https://example.com/api/versions?page=2")?;
1299 /// assert_eq!(url.path(), "/api/versions");
1300 ///
1301 /// let url = Url::parse("https://example.com")?;
1302 /// assert_eq!(url.path(), "/");
1303 ///
1304 /// let url = Url::parse("https://example.com/countries/việt nam")?;
1305 /// assert_eq!(url.path(), "/countries/vi%E1%BB%87t%20nam");
1306 /// # Ok(())
1307 /// # }
1308 /// # run().unwrap();
1309 /// ```
1310 pub fn path(&self) -> &str {
1311 match (self.query_start, self.fragment_start) {
1312 (None, None) => self.slice(self.path_start..),
1313 (Some(next_component_start), _) | (None, Some(next_component_start)) => {
1314 self.slice(self.path_start..next_component_start)
1315 }
1316 }
1317 }
1318
1319 /// Unless this URL is cannot-be-a-base,
1320 /// return an iterator of '/' slash-separated path segments,
1321 /// each as a percent-encoded ASCII string.
1322 ///
1323 /// Return `None` for cannot-be-a-base URLs.
1324 ///
1325 /// When `Some` is returned, the iterator always contains at least one string
1326 /// (which may be empty).
1327 ///
1328 /// # Examples
1329 ///
1330 /// ```
1331 /// use url::Url;
1332 /// # use std::error::Error;
1333 ///
1334 /// # fn run() -> Result<(), Box<dyn Error>> {
1335 /// let url = Url::parse("https://example.com/foo/bar")?;
1336 /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1337 /// assert_eq!(path_segments.next(), Some("foo"));
1338 /// assert_eq!(path_segments.next(), Some("bar"));
1339 /// assert_eq!(path_segments.next(), None);
1340 ///
1341 /// let url = Url::parse("https://example.com")?;
1342 /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1343 /// assert_eq!(path_segments.next(), Some(""));
1344 /// assert_eq!(path_segments.next(), None);
1345 ///
1346 /// let url = Url::parse("data:text/plain,HelloWorld")?;
1347 /// assert!(url.path_segments().is_none());
1348 ///
1349 /// let url = Url::parse("https://example.com/countries/việt nam")?;
1350 /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1351 /// assert_eq!(path_segments.next(), Some("countries"));
1352 /// assert_eq!(path_segments.next(), Some("vi%E1%BB%87t%20nam"));
1353 /// # Ok(())
1354 /// # }
1355 /// # run().unwrap();
1356 /// ```
1357 pub fn path_segments(&self) -> Option<str::Split<'_, char>> {
1358 let path = self.path();
1359 path.strip_prefix('/').map(|remainder| remainder.split('/'))
1360 }
1361
1362 /// Return this URL’s query string, if any, as a percent-encoded ASCII string.
1363 ///
1364 /// # Examples
1365 ///
1366 /// ```rust
1367 /// use url::Url;
1368 /// # use url::ParseError;
1369 ///
1370 /// fn run() -> Result<(), ParseError> {
1371 /// let url = Url::parse("https://example.com/products?page=2")?;
1372 /// let query = url.query();
1373 /// assert_eq!(query, Some("page=2"));
1374 ///
1375 /// let url = Url::parse("https://example.com/products")?;
1376 /// let query = url.query();
1377 /// assert!(query.is_none());
1378 ///
1379 /// let url = Url::parse("https://example.com/?country=español")?;
1380 /// let query = url.query();
1381 /// assert_eq!(query, Some("country=espa%C3%B1ol"));
1382 /// # Ok(())
1383 /// # }
1384 /// # run().unwrap();
1385 /// ```
1386 pub fn query(&self) -> Option<&str> {
1387 match (self.query_start, self.fragment_start) {
1388 (None, _) => None,
1389 (Some(query_start), None) => {
1390 debug_assert!(self.byte_at(query_start) == b'?');
1391 Some(self.slice(query_start + 1..))
1392 }
1393 (Some(query_start), Some(fragment_start)) => {
1394 debug_assert!(self.byte_at(query_start) == b'?');
1395 Some(self.slice(query_start + 1..fragment_start))
1396 }
1397 }
1398 }
1399
1400 /// Parse the URL’s query string, if any, as `application/x-www-form-urlencoded`
1401 /// and return an iterator of (key, value) pairs.
1402 ///
1403 /// # Examples
1404 ///
1405 /// ```rust
1406 /// use std::borrow::Cow;
1407 ///
1408 /// use url::Url;
1409 /// # use url::ParseError;
1410 ///
1411 /// # fn run() -> Result<(), ParseError> {
1412 /// let url = Url::parse("https://example.com/products?page=2&sort=desc")?;
1413 /// let mut pairs = url.query_pairs();
1414 ///
1415 /// assert_eq!(pairs.count(), 2);
1416 ///
1417 /// assert_eq!(pairs.next(), Some((Cow::Borrowed("page"), Cow::Borrowed("2"))));
1418 /// assert_eq!(pairs.next(), Some((Cow::Borrowed("sort"), Cow::Borrowed("desc"))));
1419 /// # Ok(())
1420 /// # }
1421 /// # run().unwrap();
1422 /// ```
1423
1424 #[inline]
1425 pub fn query_pairs(&self) -> form_urlencoded::Parse<'_> {
1426 form_urlencoded::parse(self.query().unwrap_or("").as_bytes())
1427 }
1428
1429 /// Return this URL’s fragment identifier, if any.
1430 ///
1431 /// A fragment is the part of the URL after the `#` symbol.
1432 /// The fragment is optional and, if present, contains a fragment identifier
1433 /// that identifies a secondary resource, such as a section heading
1434 /// of a document.
1435 ///
1436 /// In HTML, the fragment identifier is usually the id attribute of a an element
1437 /// that is scrolled to on load. Browsers typically will not send the fragment portion
1438 /// of a URL to the server.
1439 ///
1440 /// **Note:** the parser did *not* percent-encode this component,
1441 /// but the input may have been percent-encoded already.
1442 ///
1443 /// # Examples
1444 ///
1445 /// ```rust
1446 /// use url::Url;
1447 /// # use url::ParseError;
1448 ///
1449 /// # fn run() -> Result<(), ParseError> {
1450 /// let url = Url::parse("https://example.com/data.csv#row=4")?;
1451 ///
1452 /// assert_eq!(url.fragment(), Some("row=4"));
1453 ///
1454 /// let url = Url::parse("https://example.com/data.csv#cell=4,1-6,2")?;
1455 ///
1456 /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
1457 /// # Ok(())
1458 /// # }
1459 /// # run().unwrap();
1460 /// ```
1461 pub fn fragment(&self) -> Option<&str> {
1462 self.fragment_start.map(|start| {
1463 debug_assert!(self.byte_at(start) == b'#');
1464 self.slice(start + 1..)
1465 })
1466 }
1467
1468 fn mutate<F: FnOnce(&mut Parser<'_>) -> R, R>(&mut self, f: F) -> R {
1469 let mut parser = Parser::for_setter(mem::take(&mut self.serialization));
1470 let result = f(&mut parser);
1471 self.serialization = parser.serialization;
1472 result
1473 }
1474
1475 /// Change this URL’s fragment identifier.
1476 ///
1477 /// # Examples
1478 ///
1479 /// ```rust
1480 /// use url::Url;
1481 /// # use url::ParseError;
1482 ///
1483 /// # fn run() -> Result<(), ParseError> {
1484 /// let mut url = Url::parse("https://example.com/data.csv")?;
1485 /// assert_eq!(url.as_str(), "https://example.com/data.csv");
1486
1487 /// url.set_fragment(Some("cell=4,1-6,2"));
1488 /// assert_eq!(url.as_str(), "https://example.com/data.csv#cell=4,1-6,2");
1489 /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
1490 ///
1491 /// url.set_fragment(None);
1492 /// assert_eq!(url.as_str(), "https://example.com/data.csv");
1493 /// assert!(url.fragment().is_none());
1494 /// # Ok(())
1495 /// # }
1496 /// # run().unwrap();
1497 /// ```
1498 pub fn set_fragment(&mut self, fragment: Option<&str>) {
1499 // Remove any previous fragment
1500 if let Some(start) = self.fragment_start {
1501 debug_assert!(self.byte_at(start) == b'#');
1502 self.serialization.truncate(start as usize);
1503 }
1504 // Write the new one
1505 if let Some(input) = fragment {
1506 self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
1507 self.serialization.push('#');
1508 self.mutate(|parser| parser.parse_fragment(parser::Input::new_no_trim(input)))
1509 } else {
1510 self.fragment_start = None;
1511 self.strip_trailing_spaces_from_opaque_path();
1512 }
1513 }
1514
1515 fn take_fragment(&mut self) -> Option<String> {
1516 self.fragment_start.take().map(|start| {
1517 debug_assert!(self.byte_at(start) == b'#');
1518 let fragment = self.slice(start + 1..).to_owned();
1519 self.serialization.truncate(start as usize);
1520 fragment
1521 })
1522 }
1523
1524 fn restore_already_parsed_fragment(&mut self, fragment: Option<String>) {
1525 if let Some(ref fragment) = fragment {
1526 assert!(self.fragment_start.is_none());
1527 self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
1528 self.serialization.push('#');
1529 self.serialization.push_str(fragment);
1530 }
1531 }
1532
1533 /// Change this URL’s query string. If `query` is `None`, this URL's
1534 /// query string will be cleared.
1535 ///
1536 /// # Examples
1537 ///
1538 /// ```rust
1539 /// use url::Url;
1540 /// # use url::ParseError;
1541 ///
1542 /// # fn run() -> Result<(), ParseError> {
1543 /// let mut url = Url::parse("https://example.com/products")?;
1544 /// assert_eq!(url.as_str(), "https://example.com/products");
1545 ///
1546 /// url.set_query(Some("page=2"));
1547 /// assert_eq!(url.as_str(), "https://example.com/products?page=2");
1548 /// assert_eq!(url.query(), Some("page=2"));
1549 /// # Ok(())
1550 /// # }
1551 /// # run().unwrap();
1552 /// ```
1553 pub fn set_query(&mut self, query: Option<&str>) {
1554 let fragment = self.take_fragment();
1555
1556 // Remove any previous query
1557 if let Some(start) = self.query_start.take() {
1558 debug_assert!(self.byte_at(start) == b'?');
1559 self.serialization.truncate(start as usize);
1560 }
1561 // Write the new query, if any
1562 if let Some(input) = query {
1563 self.query_start = Some(to_u32(self.serialization.len()).unwrap());
1564 self.serialization.push('?');
1565 let scheme_type = SchemeType::from(self.scheme());
1566 let scheme_end = self.scheme_end;
1567 self.mutate(|parser| {
1568 let vfn = parser.violation_fn;
1569 parser.parse_query(
1570 scheme_type,
1571 scheme_end,
1572 parser::Input::new_trim_tab_and_newlines(input, vfn),
1573 )
1574 });
1575 } else {
1576 self.query_start = None;
1577 if fragment.is_none() {
1578 self.strip_trailing_spaces_from_opaque_path();
1579 }
1580 }
1581
1582 self.restore_already_parsed_fragment(fragment);
1583 }
1584
1585 /// Manipulate this URL’s query string, viewed as a sequence of name/value pairs
1586 /// in `application/x-www-form-urlencoded` syntax.
1587 ///
1588 /// The return value has a method-chaining API:
1589 ///
1590 /// ```rust
1591 /// # use url::{Url, ParseError};
1592 ///
1593 /// # fn run() -> Result<(), ParseError> {
1594 /// let mut url = Url::parse("https://example.net?lang=fr#nav")?;
1595 /// assert_eq!(url.query(), Some("lang=fr"));
1596 ///
1597 /// url.query_pairs_mut().append_pair("foo", "bar");
1598 /// assert_eq!(url.query(), Some("lang=fr&foo=bar"));
1599 /// assert_eq!(url.as_str(), "https://example.net/?lang=fr&foo=bar#nav");
1600 ///
1601 /// url.query_pairs_mut()
1602 /// .clear()
1603 /// .append_pair("foo", "bar & baz")
1604 /// .append_pair("saisons", "\u{00C9}t\u{00E9}+hiver");
1605 /// assert_eq!(url.query(), Some("foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver"));
1606 /// assert_eq!(url.as_str(),
1607 /// "https://example.net/?foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver#nav");
1608 /// # Ok(())
1609 /// # }
1610 /// # run().unwrap();
1611 /// ```
1612 ///
1613 /// Note: `url.query_pairs_mut().clear();` is equivalent to `url.set_query(Some(""))`,
1614 /// not `url.set_query(None)`.
1615 ///
1616 /// The state of `Url` is unspecified if this return value is leaked without being dropped.
1617 pub fn query_pairs_mut(&mut self) -> form_urlencoded::Serializer<'_, UrlQuery<'_>> {
1618 let fragment = self.take_fragment();
1619
1620 let query_start;
1621 if let Some(start) = self.query_start {
1622 debug_assert!(self.byte_at(start) == b'?');
1623 query_start = start as usize;
1624 } else {
1625 query_start = self.serialization.len();
1626 self.query_start = Some(to_u32(query_start).unwrap());
1627 self.serialization.push('?');
1628 }
1629
1630 let query = UrlQuery {
1631 url: Some(self),
1632 fragment,
1633 };
1634 form_urlencoded::Serializer::for_suffix(query, query_start + "?".len())
1635 }
1636
1637 fn take_after_path(&mut self) -> String {
1638 match (self.query_start, self.fragment_start) {
1639 (Some(i), _) | (None, Some(i)) => {
1640 let after_path = self.slice(i..).to_owned();
1641 self.serialization.truncate(i as usize);
1642 after_path
1643 }
1644 (None, None) => String::new(),
1645 }
1646 }
1647
1648 /// Change this URL’s path.
1649 ///
1650 /// # Examples
1651 ///
1652 /// ```rust
1653 /// use url::Url;
1654 /// # use url::ParseError;
1655 ///
1656 /// # fn run() -> Result<(), ParseError> {
1657 /// let mut url = Url::parse("https://example.com")?;
1658 /// url.set_path("api/comments");
1659 /// assert_eq!(url.as_str(), "https://example.com/api/comments");
1660 /// assert_eq!(url.path(), "/api/comments");
1661 ///
1662 /// let mut url = Url::parse("https://example.com/api")?;
1663 /// url.set_path("data/report.csv");
1664 /// assert_eq!(url.as_str(), "https://example.com/data/report.csv");
1665 /// assert_eq!(url.path(), "/data/report.csv");
1666 ///
1667 /// // `set_path` percent-encodes the given string if it's not already percent-encoded.
1668 /// let mut url = Url::parse("https://example.com")?;
1669 /// url.set_path("api/some comments");
1670 /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments");
1671 /// assert_eq!(url.path(), "/api/some%20comments");
1672 ///
1673 /// // `set_path` will not double percent-encode the string if it's already percent-encoded.
1674 /// let mut url = Url::parse("https://example.com")?;
1675 /// url.set_path("api/some%20comments");
1676 /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments");
1677 /// assert_eq!(url.path(), "/api/some%20comments");
1678 ///
1679 /// # Ok(())
1680 /// # }
1681 /// # run().unwrap();
1682 /// ```
1683 pub fn set_path(&mut self, mut path: &str) {
1684 let after_path = self.take_after_path();
1685 let old_after_path_pos = to_u32(self.serialization.len()).unwrap();
1686 let cannot_be_a_base = self.cannot_be_a_base();
1687 let scheme_type = SchemeType::from(self.scheme());
1688 self.serialization.truncate(self.path_start as usize);
1689 self.mutate(|parser| {
1690 if cannot_be_a_base {
1691 if path.starts_with('/') {
1692 parser.serialization.push_str("%2F");
1693 path = &path[1..];
1694 }
1695 parser.parse_cannot_be_a_base_path(parser::Input::new_no_trim(path));
1696 } else {
1697 let mut has_host = true; // FIXME
1698 parser.parse_path_start(
1699 scheme_type,
1700 &mut has_host,
1701 parser::Input::new_no_trim(path),
1702 );
1703 }
1704 });
1705 self.restore_after_path(old_after_path_pos, &after_path);
1706 }
1707
1708 /// Return an object with methods to manipulate this URL’s path segments.
1709 ///
1710 /// Return `Err(())` if this URL is cannot-be-a-base.
1711 #[allow(clippy::result_unit_err)]
1712 pub fn path_segments_mut(&mut self) -> Result<PathSegmentsMut<'_>, ()> {
1713 if self.cannot_be_a_base() {
1714 Err(())
1715 } else {
1716 Ok(path_segments::new(self))
1717 }
1718 }
1719
1720 fn restore_after_path(&mut self, old_after_path_position: u32, after_path: &str) {
1721 let new_after_path_position = to_u32(self.serialization.len()).unwrap();
1722 let adjust = |index: &mut u32| {
1723 *index -= old_after_path_position;
1724 *index += new_after_path_position;
1725 };
1726 if let Some(ref mut index) = self.query_start {
1727 adjust(index)
1728 }
1729 if let Some(ref mut index) = self.fragment_start {
1730 adjust(index)
1731 }
1732 self.serialization.push_str(after_path)
1733 }
1734
1735 /// Change this URL’s port number.
1736 ///
1737 /// Note that default port numbers are not reflected in the serialization.
1738 ///
1739 /// If this URL is cannot-be-a-base, does not have a host, or has the `file` scheme;
1740 /// do nothing and return `Err`.
1741 ///
1742 /// # Examples
1743 ///
1744 /// ```
1745 /// use url::Url;
1746 /// # use std::error::Error;
1747 ///
1748 /// # fn run() -> Result<(), Box<dyn Error>> {
1749 /// let mut url = Url::parse("ssh://example.net:2048/")?;
1750 ///
1751 /// url.set_port(Some(4096)).map_err(|_| "cannot be base")?;
1752 /// assert_eq!(url.as_str(), "ssh://example.net:4096/");
1753 ///
1754 /// url.set_port(None).map_err(|_| "cannot be base")?;
1755 /// assert_eq!(url.as_str(), "ssh://example.net/");
1756 /// # Ok(())
1757 /// # }
1758 /// # run().unwrap();
1759 /// ```
1760 ///
1761 /// Known default port numbers are not reflected:
1762 ///
1763 /// ```rust
1764 /// use url::Url;
1765 /// # use std::error::Error;
1766 ///
1767 /// # fn run() -> Result<(), Box<dyn Error>> {
1768 /// let mut url = Url::parse("https://example.org/")?;
1769 ///
1770 /// url.set_port(Some(443)).map_err(|_| "cannot be base")?;
1771 /// assert!(url.port().is_none());
1772 /// # Ok(())
1773 /// # }
1774 /// # run().unwrap();
1775 /// ```
1776 ///
1777 /// Cannot set port for cannot-be-a-base URLs:
1778 ///
1779 /// ```
1780 /// use url::Url;
1781 /// # use url::ParseError;
1782 ///
1783 /// # fn run() -> Result<(), ParseError> {
1784 /// let mut url = Url::parse("mailto:rms@example.net")?;
1785 ///
1786 /// let result = url.set_port(Some(80));
1787 /// assert!(result.is_err());
1788 ///
1789 /// let result = url.set_port(None);
1790 /// assert!(result.is_err());
1791 /// # Ok(())
1792 /// # }
1793 /// # run().unwrap();
1794 /// ```
1795 #[allow(clippy::result_unit_err)]
1796 pub fn set_port(&mut self, mut port: Option<u16>) -> Result<(), ()> {
1797 // has_host implies !cannot_be_a_base
1798 if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
1799 return Err(());
1800 }
1801 if port.is_some() && port == parser::default_port(self.scheme()) {
1802 port = None
1803 }
1804 self.set_port_internal(port);
1805 Ok(())
1806 }
1807
1808 fn set_port_internal(&mut self, port: Option<u16>) {
1809 match (self.port, port) {
1810 (None, None) => {}
1811 (Some(_), None) => {
1812 self.serialization
1813 .drain(self.host_end as usize..self.path_start as usize);
1814 let offset = self.path_start - self.host_end;
1815 self.path_start = self.host_end;
1816 if let Some(ref mut index) = self.query_start {
1817 *index -= offset
1818 }
1819 if let Some(ref mut index) = self.fragment_start {
1820 *index -= offset
1821 }
1822 }
1823 (Some(old), Some(new)) if old == new => {}
1824 (_, Some(new)) => {
1825 let path_and_after = self.slice(self.path_start..).to_owned();
1826 self.serialization.truncate(self.host_end as usize);
1827 write!(&mut self.serialization, ":{}", new).unwrap();
1828 let old_path_start = self.path_start;
1829 let new_path_start = to_u32(self.serialization.len()).unwrap();
1830 self.path_start = new_path_start;
1831 let adjust = |index: &mut u32| {
1832 *index -= old_path_start;
1833 *index += new_path_start;
1834 };
1835 if let Some(ref mut index) = self.query_start {
1836 adjust(index)
1837 }
1838 if let Some(ref mut index) = self.fragment_start {
1839 adjust(index)
1840 }
1841 self.serialization.push_str(&path_and_after);
1842 }
1843 }
1844 self.port = port;
1845 }
1846
1847 /// Change this URL’s host.
1848 ///
1849 /// Removing the host (calling this with `None`)
1850 /// will also remove any username, password, and port number.
1851 ///
1852 /// # Examples
1853 ///
1854 /// Change host:
1855 ///
1856 /// ```
1857 /// use url::Url;
1858 /// # use url::ParseError;
1859 ///
1860 /// # fn run() -> Result<(), ParseError> {
1861 /// let mut url = Url::parse("https://example.net")?;
1862 /// let result = url.set_host(Some("rust-lang.org"));
1863 /// assert!(result.is_ok());
1864 /// assert_eq!(url.as_str(), "https://rust-lang.org/");
1865 /// # Ok(())
1866 /// # }
1867 /// # run().unwrap();
1868 /// ```
1869 ///
1870 /// Remove host:
1871 ///
1872 /// ```
1873 /// use url::Url;
1874 /// # use url::ParseError;
1875 ///
1876 /// # fn run() -> Result<(), ParseError> {
1877 /// let mut url = Url::parse("foo://example.net")?;
1878 /// let result = url.set_host(None);
1879 /// assert!(result.is_ok());
1880 /// assert_eq!(url.as_str(), "foo:/");
1881 /// # Ok(())
1882 /// # }
1883 /// # run().unwrap();
1884 /// ```
1885 ///
1886 /// Cannot remove host for 'special' schemes (e.g. `http`):
1887 ///
1888 /// ```
1889 /// use url::Url;
1890 /// # use url::ParseError;
1891 ///
1892 /// # fn run() -> Result<(), ParseError> {
1893 /// let mut url = Url::parse("https://example.net")?;
1894 /// let result = url.set_host(None);
1895 /// assert!(result.is_err());
1896 /// assert_eq!(url.as_str(), "https://example.net/");
1897 /// # Ok(())
1898 /// # }
1899 /// # run().unwrap();
1900 /// ```
1901 ///
1902 /// Cannot change or remove host for cannot-be-a-base URLs:
1903 ///
1904 /// ```
1905 /// use url::Url;
1906 /// # use url::ParseError;
1907 ///
1908 /// # fn run() -> Result<(), ParseError> {
1909 /// let mut url = Url::parse("mailto:rms@example.net")?;
1910 ///
1911 /// let result = url.set_host(Some("rust-lang.org"));
1912 /// assert!(result.is_err());
1913 /// assert_eq!(url.as_str(), "mailto:rms@example.net");
1914 ///
1915 /// let result = url.set_host(None);
1916 /// assert!(result.is_err());
1917 /// assert_eq!(url.as_str(), "mailto:rms@example.net");
1918 /// # Ok(())
1919 /// # }
1920 /// # run().unwrap();
1921 /// ```
1922 ///
1923 /// # Errors
1924 ///
1925 /// If this URL is cannot-be-a-base or there is an error parsing the given `host`,
1926 /// a [`ParseError`] variant will be returned.
1927 ///
1928 /// [`ParseError`]: enum.ParseError.html
1929 pub fn set_host(&mut self, host: Option<&str>) -> Result<(), ParseError> {
1930 if self.cannot_be_a_base() {
1931 return Err(ParseError::SetHostOnCannotBeABaseUrl);
1932 }
1933
1934 let scheme_type = SchemeType::from(self.scheme());
1935
1936 if let Some(host) = host {
1937 if host.is_empty() && scheme_type.is_special() && !scheme_type.is_file() {
1938 return Err(ParseError::EmptyHost);
1939 }
1940 let mut host_substr = host;
1941 // Otherwise, if c is U+003A (:) and the [] flag is unset, then
1942 if !host.starts_with('[') || !host.ends_with(']') {
1943 match host.find(':') {
1944 Some(0) => {
1945 // If buffer is the empty string, validation error, return failure.
1946 return Err(ParseError::InvalidDomainCharacter);
1947 }
1948 // Let host be the result of host parsing buffer
1949 Some(colon_index) => {
1950 host_substr = &host[..colon_index];
1951 }
1952 None => {}
1953 }
1954 }
1955 if SchemeType::from(self.scheme()).is_special() {
1956 self.set_host_internal(Host::parse(host_substr)?, None);
1957 } else {
1958 self.set_host_internal(Host::parse_opaque(host_substr)?, None);
1959 }
1960 } else if self.has_host() {
1961 if scheme_type.is_special() && !scheme_type.is_file() {
1962 return Err(ParseError::EmptyHost);
1963 } else if self.serialization.len() == self.path_start as usize {
1964 self.serialization.push('/');
1965 }
1966 debug_assert!(self.byte_at(self.scheme_end) == b':');
1967 debug_assert!(self.byte_at(self.path_start) == b'/');
1968
1969 let new_path_start = if scheme_type.is_file() {
1970 self.scheme_end + 3
1971 } else {
1972 self.scheme_end + 1
1973 };
1974
1975 self.serialization
1976 .drain(new_path_start as usize..self.path_start as usize);
1977 let offset = self.path_start - new_path_start;
1978 self.path_start = new_path_start;
1979 self.username_end = new_path_start;
1980 self.host_start = new_path_start;
1981 self.host_end = new_path_start;
1982 self.port = None;
1983 if let Some(ref mut index) = self.query_start {
1984 *index -= offset
1985 }
1986 if let Some(ref mut index) = self.fragment_start {
1987 *index -= offset
1988 }
1989 }
1990 Ok(())
1991 }
1992
1993 /// opt_new_port: None means leave unchanged, Some(None) means remove any port number.
1994 fn set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>) {
1995 let old_suffix_pos = if opt_new_port.is_some() {
1996 self.path_start
1997 } else {
1998 self.host_end
1999 };
2000 let suffix = self.slice(old_suffix_pos..).to_owned();
2001 self.serialization.truncate(self.host_start as usize);
2002 if !self.has_authority() {
2003 debug_assert!(self.slice(self.scheme_end..self.host_start) == ":");
2004 debug_assert!(self.username_end == self.host_start);
2005 self.serialization.push('/');
2006 self.serialization.push('/');
2007 self.username_end += 2;
2008 self.host_start += 2;
2009 }
2010 write!(&mut self.serialization, "{}", host).unwrap();
2011 self.host_end = to_u32(self.serialization.len()).unwrap();
2012 self.host = host.into();
2013
2014 if let Some(new_port) = opt_new_port {
2015 self.port = new_port;
2016 if let Some(port) = new_port {
2017 write!(&mut self.serialization, ":{}", port).unwrap();
2018 }
2019 }
2020 let new_suffix_pos = to_u32(self.serialization.len()).unwrap();
2021 self.serialization.push_str(&suffix);
2022
2023 let adjust = |index: &mut u32| {
2024 *index -= old_suffix_pos;
2025 *index += new_suffix_pos;
2026 };
2027 adjust(&mut self.path_start);
2028 if let Some(ref mut index) = self.query_start {
2029 adjust(index)
2030 }
2031 if let Some(ref mut index) = self.fragment_start {
2032 adjust(index)
2033 }
2034 }
2035
2036 /// Change this URL’s host to the given IP address.
2037 ///
2038 /// If this URL is cannot-be-a-base, do nothing and return `Err`.
2039 ///
2040 /// Compared to `Url::set_host`, this skips the host parser.
2041 ///
2042 /// # Examples
2043 ///
2044 /// ```rust
2045 /// use url::{Url, ParseError};
2046 ///
2047 /// # fn run() -> Result<(), ParseError> {
2048 /// let mut url = Url::parse("http://example.com")?;
2049 /// url.set_ip_host("127.0.0.1".parse().unwrap());
2050 /// assert_eq!(url.host_str(), Some("127.0.0.1"));
2051 /// assert_eq!(url.as_str(), "http://127.0.0.1/");
2052 /// # Ok(())
2053 /// # }
2054 /// # run().unwrap();
2055 /// ```
2056 ///
2057 /// Cannot change URL's from mailto(cannot-be-base) to ip:
2058 ///
2059 /// ```rust
2060 /// use url::{Url, ParseError};
2061 ///
2062 /// # fn run() -> Result<(), ParseError> {
2063 /// let mut url = Url::parse("mailto:rms@example.com")?;
2064 /// let result = url.set_ip_host("127.0.0.1".parse().unwrap());
2065 ///
2066 /// assert_eq!(url.as_str(), "mailto:rms@example.com");
2067 /// assert!(result.is_err());
2068 /// # Ok(())
2069 /// # }
2070 /// # run().unwrap();
2071 /// ```
2072 ///
2073 #[allow(clippy::result_unit_err)]
2074 pub fn set_ip_host(&mut self, address: IpAddr) -> Result<(), ()> {
2075 if self.cannot_be_a_base() {
2076 return Err(());
2077 }
2078
2079 let address = match address {
2080 IpAddr::V4(address) => Host::Ipv4(address),
2081 IpAddr::V6(address) => Host::Ipv6(address),
2082 };
2083 self.set_host_internal(address, None);
2084 Ok(())
2085 }
2086
2087 /// Change this URL’s password.
2088 ///
2089 /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
2090 ///
2091 /// # Examples
2092 ///
2093 /// ```rust
2094 /// use url::{Url, ParseError};
2095 ///
2096 /// # fn run() -> Result<(), ParseError> {
2097 /// let mut url = Url::parse("mailto:rmz@example.com")?;
2098 /// let result = url.set_password(Some("secret_password"));
2099 /// assert!(result.is_err());
2100 ///
2101 /// let mut url = Url::parse("ftp://user1:secret1@example.com")?;
2102 /// let result = url.set_password(Some("secret_password"));
2103 /// assert_eq!(url.password(), Some("secret_password"));
2104 ///
2105 /// let mut url = Url::parse("ftp://user2:@example.com")?;
2106 /// let result = url.set_password(Some("secret2"));
2107 /// assert!(result.is_ok());
2108 /// assert_eq!(url.password(), Some("secret2"));
2109 /// # Ok(())
2110 /// # }
2111 /// # run().unwrap();
2112 /// ```
2113 #[allow(clippy::result_unit_err)]
2114 pub fn set_password(&mut self, password: Option<&str>) -> Result<(), ()> {
2115 // has_host implies !cannot_be_a_base
2116 if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
2117 return Err(());
2118 }
2119 let password = password.unwrap_or_default();
2120 if !password.is_empty() {
2121 let host_and_after = self.slice(self.host_start..).to_owned();
2122 self.serialization.truncate(self.username_end as usize);
2123 self.serialization.push(':');
2124 self.serialization
2125 .extend(utf8_percent_encode(password, USERINFO));
2126 self.serialization.push('@');
2127
2128 let old_host_start = self.host_start;
2129 let new_host_start = to_u32(self.serialization.len()).unwrap();
2130 let adjust = |index: &mut u32| {
2131 *index -= old_host_start;
2132 *index += new_host_start;
2133 };
2134 self.host_start = new_host_start;
2135 adjust(&mut self.host_end);
2136 adjust(&mut self.path_start);
2137 if let Some(ref mut index) = self.query_start {
2138 adjust(index)
2139 }
2140 if let Some(ref mut index) = self.fragment_start {
2141 adjust(index)
2142 }
2143
2144 self.serialization.push_str(&host_and_after);
2145 } else if self.byte_at(self.username_end) == b':' {
2146 // If there is a password to remove
2147 let has_username_or_password = self.byte_at(self.host_start - 1) == b'@';
2148 debug_assert!(has_username_or_password);
2149 let username_start = self.scheme_end + 3;
2150 let empty_username = username_start == self.username_end;
2151 let start = self.username_end; // Remove the ':'
2152 let end = if empty_username {
2153 self.host_start // Remove the '@' as well
2154 } else {
2155 self.host_start - 1 // Keep the '@' to separate the username from the host
2156 };
2157 self.serialization.drain(start as usize..end as usize);
2158 let offset = end - start;
2159 self.host_start -= offset;
2160 self.host_end -= offset;
2161 self.path_start -= offset;
2162 if let Some(ref mut index) = self.query_start {
2163 *index -= offset
2164 }
2165 if let Some(ref mut index) = self.fragment_start {
2166 *index -= offset
2167 }
2168 }
2169 Ok(())
2170 }
2171
2172 /// Change this URL’s username.
2173 ///
2174 /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
2175 /// # Examples
2176 ///
2177 /// Cannot setup username from mailto(cannot-be-base)
2178 ///
2179 /// ```rust
2180 /// use url::{Url, ParseError};
2181 ///
2182 /// # fn run() -> Result<(), ParseError> {
2183 /// let mut url = Url::parse("mailto:rmz@example.com")?;
2184 /// let result = url.set_username("user1");
2185 /// assert_eq!(url.as_str(), "mailto:rmz@example.com");
2186 /// assert!(result.is_err());
2187 /// # Ok(())
2188 /// # }
2189 /// # run().unwrap();
2190 /// ```
2191 ///
2192 /// Setup username to user1
2193 ///
2194 /// ```rust
2195 /// use url::{Url, ParseError};
2196 ///
2197 /// # fn run() -> Result<(), ParseError> {
2198 /// let mut url = Url::parse("ftp://:secre1@example.com/")?;
2199 /// let result = url.set_username("user1");
2200 /// assert!(result.is_ok());
2201 /// assert_eq!(url.username(), "user1");
2202 /// assert_eq!(url.as_str(), "ftp://user1:secre1@example.com/");
2203 /// # Ok(())
2204 /// # }
2205 /// # run().unwrap();
2206 /// ```
2207 #[allow(clippy::result_unit_err)]
2208 pub fn set_username(&mut self, username: &str) -> Result<(), ()> {
2209 // has_host implies !cannot_be_a_base
2210 if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
2211 return Err(());
2212 }
2213 let username_start = self.scheme_end + 3;
2214 debug_assert!(self.slice(self.scheme_end..username_start) == "://");
2215 if self.slice(username_start..self.username_end) == username {
2216 return Ok(());
2217 }
2218 let after_username = self.slice(self.username_end..).to_owned();
2219 self.serialization.truncate(username_start as usize);
2220 self.serialization
2221 .extend(utf8_percent_encode(username, USERINFO));
2222
2223 let mut removed_bytes = self.username_end;
2224 self.username_end = to_u32(self.serialization.len()).unwrap();
2225 let mut added_bytes = self.username_end;
2226
2227 let new_username_is_empty = self.username_end == username_start;
2228 match (new_username_is_empty, after_username.chars().next()) {
2229 (true, Some('@')) => {
2230 removed_bytes += 1;
2231 self.serialization.push_str(&after_username[1..]);
2232 }
2233 (false, Some('@')) | (_, Some(':')) | (true, _) => {
2234 self.serialization.push_str(&after_username);
2235 }
2236 (false, _) => {
2237 added_bytes += 1;
2238 self.serialization.push('@');
2239 self.serialization.push_str(&after_username);
2240 }
2241 }
2242
2243 let adjust = |index: &mut u32| {
2244 *index -= removed_bytes;
2245 *index += added_bytes;
2246 };
2247 adjust(&mut self.host_start);
2248 adjust(&mut self.host_end);
2249 adjust(&mut self.path_start);
2250 if let Some(ref mut index) = self.query_start {
2251 adjust(index)
2252 }
2253 if let Some(ref mut index) = self.fragment_start {
2254 adjust(index)
2255 }
2256 Ok(())
2257 }
2258
2259 /// Change this URL’s scheme.
2260 ///
2261 /// Do nothing and return `Err` under the following circumstances:
2262 ///
2263 /// * If the new scheme is not in `[a-zA-Z][a-zA-Z0-9+.-]+`
2264 /// * If this URL is cannot-be-a-base and the new scheme is one of
2265 /// `http`, `https`, `ws`, `wss` or `ftp`
2266 /// * If either the old or new scheme is `http`, `https`, `ws`,
2267 /// `wss` or `ftp` and the other is not one of these
2268 /// * If the new scheme is `file` and this URL includes credentials
2269 /// or has a non-null port
2270 /// * If this URL's scheme is `file` and its host is empty or null
2271 ///
2272 /// See also [the URL specification's section on legal scheme state
2273 /// overrides](https://url.spec.whatwg.org/#scheme-state).
2274 ///
2275 /// # Examples
2276 ///
2277 /// Change the URL’s scheme from `https` to `http`:
2278 ///
2279 /// ```
2280 /// use url::Url;
2281 /// # use url::ParseError;
2282 ///
2283 /// # fn run() -> Result<(), ParseError> {
2284 /// let mut url = Url::parse("https://example.net")?;
2285 /// let result = url.set_scheme("http");
2286 /// assert_eq!(url.as_str(), "http://example.net/");
2287 /// assert!(result.is_ok());
2288 /// # Ok(())
2289 /// # }
2290 /// # run().unwrap();
2291 /// ```
2292 /// Change the URL’s scheme from `foo` to `bar`:
2293 ///
2294 /// ```
2295 /// use url::Url;
2296 /// # use url::ParseError;
2297 ///
2298 /// # fn run() -> Result<(), ParseError> {
2299 /// let mut url = Url::parse("foo://example.net")?;
2300 /// let result = url.set_scheme("bar");
2301 /// assert_eq!(url.as_str(), "bar://example.net");
2302 /// assert!(result.is_ok());
2303 /// # Ok(())
2304 /// # }
2305 /// # run().unwrap();
2306 /// ```
2307 ///
2308 /// Cannot change URL’s scheme from `https` to `foõ`:
2309 ///
2310 /// ```
2311 /// use url::Url;
2312 /// # use url::ParseError;
2313 ///
2314 /// # fn run() -> Result<(), ParseError> {
2315 /// let mut url = Url::parse("https://example.net")?;
2316 /// let result = url.set_scheme("foõ");
2317 /// assert_eq!(url.as_str(), "https://example.net/");
2318 /// assert!(result.is_err());
2319 /// # Ok(())
2320 /// # }
2321 /// # run().unwrap();
2322 /// ```
2323 ///
2324 /// Cannot change URL’s scheme from `mailto` (cannot-be-a-base) to `https`:
2325 ///
2326 /// ```
2327 /// use url::Url;
2328 /// # use url::ParseError;
2329 ///
2330 /// # fn run() -> Result<(), ParseError> {
2331 /// let mut url = Url::parse("mailto:rms@example.net")?;
2332 /// let result = url.set_scheme("https");
2333 /// assert_eq!(url.as_str(), "mailto:rms@example.net");
2334 /// assert!(result.is_err());
2335 /// # Ok(())
2336 /// # }
2337 /// # run().unwrap();
2338 /// ```
2339 /// Cannot change the URL’s scheme from `foo` to `https`:
2340 ///
2341 /// ```
2342 /// use url::Url;
2343 /// # use url::ParseError;
2344 ///
2345 /// # fn run() -> Result<(), ParseError> {
2346 /// let mut url = Url::parse("foo://example.net")?;
2347 /// let result = url.set_scheme("https");
2348 /// assert_eq!(url.as_str(), "foo://example.net");
2349 /// assert!(result.is_err());
2350 /// # Ok(())
2351 /// # }
2352 /// # run().unwrap();
2353 /// ```
2354 /// Cannot change the URL’s scheme from `http` to `foo`:
2355 ///
2356 /// ```
2357 /// use url::Url;
2358 /// # use url::ParseError;
2359 ///
2360 /// # fn run() -> Result<(), ParseError> {
2361 /// let mut url = Url::parse("http://example.net")?;
2362 /// let result = url.set_scheme("foo");
2363 /// assert_eq!(url.as_str(), "http://example.net/");
2364 /// assert!(result.is_err());
2365 /// # Ok(())
2366 /// # }
2367 /// # run().unwrap();
2368 /// ```
2369 #[allow(clippy::result_unit_err, clippy::suspicious_operation_groupings)]
2370 pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> {
2371 let mut parser = Parser::for_setter(String::new());
2372 let remaining = parser.parse_scheme(parser::Input::new_no_trim(scheme))?;
2373 let new_scheme_type = SchemeType::from(&parser.serialization);
2374 let old_scheme_type = SchemeType::from(self.scheme());
2375 // If url’s scheme is a special scheme and buffer is not a special scheme, then return.
2376 if (new_scheme_type.is_special() && !old_scheme_type.is_special()) ||
2377 // If url’s scheme is not a special scheme and buffer is a special scheme, then return.
2378 (!new_scheme_type.is_special() && old_scheme_type.is_special()) ||
2379 // If url includes credentials or has a non-null port, and buffer is "file", then return.
2380 // If url’s scheme is "file" and its host is an empty host or null, then return.
2381 (new_scheme_type.is_file() && self.has_authority())
2382 {
2383 return Err(());
2384 }
2385
2386 if !remaining.is_empty() || (!self.has_host() && new_scheme_type.is_special()) {
2387 return Err(());
2388 }
2389 let old_scheme_end = self.scheme_end;
2390 let new_scheme_end = to_u32(parser.serialization.len()).unwrap();
2391 let adjust = |index: &mut u32| {
2392 *index -= old_scheme_end;
2393 *index += new_scheme_end;
2394 };
2395
2396 self.scheme_end = new_scheme_end;
2397 adjust(&mut self.username_end);
2398 adjust(&mut self.host_start);
2399 adjust(&mut self.host_end);
2400 adjust(&mut self.path_start);
2401 if let Some(ref mut index) = self.query_start {
2402 adjust(index)
2403 }
2404 if let Some(ref mut index) = self.fragment_start {
2405 adjust(index)
2406 }
2407
2408 parser.serialization.push_str(self.slice(old_scheme_end..));
2409 self.serialization = parser.serialization;
2410
2411 // Update the port so it can be removed
2412 // If it is the scheme's default
2413 // we don't mind it silently failing
2414 // if there was no port in the first place
2415 let previous_port = self.port();
2416 let _ = self.set_port(previous_port);
2417
2418 Ok(())
2419 }
2420
2421 /// Convert a file name as `std::path::Path` into an URL in the `file` scheme.
2422 ///
2423 /// This returns `Err` if the given path is not absolute or,
2424 /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
2425 ///
2426 /// # Examples
2427 ///
2428 /// On Unix-like platforms:
2429 ///
2430 /// ```
2431 /// # if cfg!(unix) {
2432 /// use url::Url;
2433 ///
2434 /// # fn run() -> Result<(), ()> {
2435 /// let url = Url::from_file_path("/tmp/foo.txt")?;
2436 /// assert_eq!(url.as_str(), "file:///tmp/foo.txt");
2437 ///
2438 /// let url = Url::from_file_path("../foo.txt");
2439 /// assert!(url.is_err());
2440 ///
2441 /// let url = Url::from_file_path("https://google.com/");
2442 /// assert!(url.is_err());
2443 /// # Ok(())
2444 /// # }
2445 /// # run().unwrap();
2446 /// # }
2447 /// ```
2448 #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
2449 #[allow(clippy::result_unit_err)]
2450 pub fn from_file_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> {
2451 let mut serialization = "file://".to_owned();
2452 let host_start = serialization.len() as u32;
2453 let (host_end, host) = path_to_file_url_segments(path.as_ref(), &mut serialization)?;
2454 Ok(Url {
2455 serialization,
2456 scheme_end: "file".len() as u32,
2457 username_end: host_start,
2458 host_start,
2459 host_end,
2460 host,
2461 port: None,
2462 path_start: host_end,
2463 query_start: None,
2464 fragment_start: None,
2465 })
2466 }
2467
2468 /// Convert a directory name as `std::path::Path` into an URL in the `file` scheme.
2469 ///
2470 /// This returns `Err` if the given path is not absolute or,
2471 /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
2472 ///
2473 /// Compared to `from_file_path`, this ensure that URL’s the path has a trailing slash
2474 /// so that the entire path is considered when using this URL as a base URL.
2475 ///
2476 /// For example:
2477 ///
2478 /// * `"index.html"` parsed with `Url::from_directory_path(Path::new("/var/www"))`
2479 /// as the base URL is `file:///var/www/index.html`
2480 /// * `"index.html"` parsed with `Url::from_file_path(Path::new("/var/www"))`
2481 /// as the base URL is `file:///var/index.html`, which might not be what was intended.
2482 ///
2483 /// Note that `std::path` does not consider trailing slashes significant
2484 /// and usually does not include them (e.g. in `Path::parent()`).
2485 #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
2486 #[allow(clippy::result_unit_err)]
2487 pub fn from_directory_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> {
2488 let mut url = Url::from_file_path(path)?;
2489 if !url.serialization.ends_with('/') {
2490 url.serialization.push('/')
2491 }
2492 Ok(url)
2493 }
2494
2495 /// Serialize with Serde using the internal representation of the `Url` struct.
2496 ///
2497 /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
2498 /// for speed, compared to the `Deserialize` trait impl.
2499 ///
2500 /// This method is only available if the `serde` Cargo feature is enabled.
2501 #[cfg(feature = "serde")]
2502 #[deny(unused)]
2503 pub fn serialize_internal<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
2504 where
2505 S: serde::Serializer,
2506 {
2507 use serde::Serialize;
2508 // Destructuring first lets us ensure that adding or removing fields forces this method
2509 // to be updated
2510 let Url {
2511 ref serialization,
2512 ref scheme_end,
2513 ref username_end,
2514 ref host_start,
2515 ref host_end,
2516 ref host,
2517 ref port,
2518 ref path_start,
2519 ref query_start,
2520 ref fragment_start,
2521 } = *self;
2522 (
2523 serialization,
2524 scheme_end,
2525 username_end,
2526 host_start,
2527 host_end,
2528 host,
2529 port,
2530 path_start,
2531 query_start,
2532 fragment_start,
2533 )
2534 .serialize(serializer)
2535 }
2536
2537 /// Serialize with Serde using the internal representation of the `Url` struct.
2538 ///
2539 /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
2540 /// for speed, compared to the `Deserialize` trait impl.
2541 ///
2542 /// This method is only available if the `serde` Cargo feature is enabled.
2543 #[cfg(feature = "serde")]
2544 #[deny(unused)]
2545 pub fn deserialize_internal<'de, D>(deserializer: D) -> Result<Self, D::Error>
2546 where
2547 D: serde::Deserializer<'de>,
2548 {
2549 use serde::de::{Deserialize, Error, Unexpected};
2550 let (
2551 serialization,
2552 scheme_end,
2553 username_end,
2554 host_start,
2555 host_end,
2556 host,
2557 port,
2558 path_start,
2559 query_start,
2560 fragment_start,
2561 ) = Deserialize::deserialize(deserializer)?;
2562 let url = Url {
2563 serialization,
2564 scheme_end,
2565 username_end,
2566 host_start,
2567 host_end,
2568 host,
2569 port,
2570 path_start,
2571 query_start,
2572 fragment_start,
2573 };
2574 if cfg!(debug_assertions) {
2575 url.check_invariants().map_err(|reason| {
2576 let reason: &str = &reason;
2577 Error::invalid_value(Unexpected::Other("value"), &reason)
2578 })?
2579 }
2580 Ok(url)
2581 }
2582
2583 /// Assuming the URL is in the `file` scheme or similar,
2584 /// convert its path to an absolute `std::path::Path`.
2585 ///
2586 /// **Note:** This does not actually check the URL’s `scheme`,
2587 /// and may give nonsensical results for other schemes.
2588 /// It is the user’s responsibility to check the URL’s scheme before calling this.
2589 ///
2590 /// ```
2591 /// # use url::Url;
2592 /// # let url = Url::parse("file:///etc/passwd").unwrap();
2593 /// let path = url.to_file_path();
2594 /// ```
2595 ///
2596 /// Returns `Err` if the host is neither empty nor `"localhost"` (except on Windows, where
2597 /// `file:` URLs may have a non-local host),
2598 /// or if `Path::new_opt()` returns `None`.
2599 /// (That is, if the percent-decoded path contains a NUL byte or,
2600 /// for a Windows path, is not UTF-8.)
2601 #[inline]
2602 #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
2603 #[allow(clippy::result_unit_err)]
2604 pub fn to_file_path(&self) -> Result<PathBuf, ()> {
2605 if let Some(segments) = self.path_segments() {
2606 let host = match self.host() {
2607 None | Some(Host::Domain("localhost")) => None,
2608 Some(_) if cfg!(windows) && self.scheme() == "file" => {
2609 Some(&self.serialization[self.host_start as usize..self.host_end as usize])
2610 }
2611 _ => return Err(()),
2612 };
2613
2614 return file_url_segments_to_pathbuf(host, segments);
2615 }
2616 Err(())
2617 }
2618
2619 // Private helper methods:
2620
2621 #[inline]
2622 fn slice<R>(&self, range: R) -> &str
2623 where
2624 R: RangeArg,
2625 {
2626 range.slice_of(&self.serialization)
2627 }
2628
2629 #[inline]
2630 fn byte_at(&self, i: u32) -> u8 {
2631 self.serialization.as_bytes()[i as usize]
2632 }
2633}
2634
2635/// Parse a string as an URL, without a base URL or encoding override.
2636impl str::FromStr for Url {
2637 type Err = ParseError;
2638
2639 #[inline]
2640 fn from_str(input: &str) -> Result<Url, crate::ParseError> {
2641 Url::parse(input)
2642 }
2643}
2644
2645impl<'a> TryFrom<&'a str> for Url {
2646 type Error = ParseError;
2647
2648 fn try_from(s: &'a str) -> Result<Self, Self::Error> {
2649 Url::parse(s)
2650 }
2651}
2652
2653/// Display the serialization of this URL.
2654impl fmt::Display for Url {
2655 #[inline]
2656 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
2657 fmt::Display::fmt(&self.serialization, formatter)
2658 }
2659}
2660
2661/// String conversion.
2662impl From<Url> for String {
2663 fn from(value: Url) -> String {
2664 value.serialization
2665 }
2666}
2667
2668/// Debug the serialization of this URL.
2669impl fmt::Debug for Url {
2670 #[inline]
2671 fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2672 formatter
2673 .debug_struct("Url")
2674 .field("scheme", &self.scheme())
2675 .field("cannot_be_a_base", &self.cannot_be_a_base())
2676 .field("username", &self.username())
2677 .field("password", &self.password())
2678 .field("host", &self.host())
2679 .field("port", &self.port())
2680 .field("path", &self.path())
2681 .field("query", &self.query())
2682 .field("fragment", &self.fragment())
2683 .finish()
2684 }
2685}
2686
2687/// URLs compare like their serialization.
2688impl Eq for Url {}
2689
2690/// URLs compare like their serialization.
2691impl PartialEq for Url {
2692 #[inline]
2693 fn eq(&self, other: &Self) -> bool {
2694 self.serialization == other.serialization
2695 }
2696}
2697
2698/// URLs compare like their serialization.
2699impl Ord for Url {
2700 #[inline]
2701 fn cmp(&self, other: &Self) -> cmp::Ordering {
2702 self.serialization.cmp(&other.serialization)
2703 }
2704}
2705
2706/// URLs compare like their serialization.
2707impl PartialOrd for Url {
2708 #[inline]
2709 fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
2710 Some(self.cmp(other))
2711 }
2712}
2713
2714/// URLs hash like their serialization.
2715impl hash::Hash for Url {
2716 #[inline]
2717 fn hash<H>(&self, state: &mut H)
2718 where
2719 H: hash::Hasher,
2720 {
2721 hash::Hash::hash(&self.serialization, state)
2722 }
2723}
2724
2725/// Return the serialization of this URL.
2726impl AsRef<str> for Url {
2727 #[inline]
2728 fn as_ref(&self) -> &str {
2729 &self.serialization
2730 }
2731}
2732
2733trait RangeArg {
2734 fn slice_of<'a>(&self, s: &'a str) -> &'a str;
2735}
2736
2737impl RangeArg for Range<u32> {
2738 #[inline]
2739 fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2740 &s[self.start as usize..self.end as usize]
2741 }
2742}
2743
2744impl RangeArg for RangeFrom<u32> {
2745 #[inline]
2746 fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2747 &s[self.start as usize..]
2748 }
2749}
2750
2751impl RangeArg for RangeTo<u32> {
2752 #[inline]
2753 fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2754 &s[..self.end as usize]
2755 }
2756}
2757
2758/// Serializes this URL into a `serde` stream.
2759///
2760/// This implementation is only available if the `serde` Cargo feature is enabled.
2761#[cfg(feature = "serde")]
2762impl serde::Serialize for Url {
2763 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
2764 where
2765 S: serde::Serializer,
2766 {
2767 serializer.serialize_str(self.as_str())
2768 }
2769}
2770
2771/// Deserializes this URL from a `serde` stream.
2772///
2773/// This implementation is only available if the `serde` Cargo feature is enabled.
2774#[cfg(feature = "serde")]
2775impl<'de> serde::Deserialize<'de> for Url {
2776 fn deserialize<D>(deserializer: D) -> Result<Url, D::Error>
2777 where
2778 D: serde::Deserializer<'de>,
2779 {
2780 use serde::de::{Error, Unexpected, Visitor};
2781
2782 struct UrlVisitor;
2783
2784 impl<'de> Visitor<'de> for UrlVisitor {
2785 type Value = Url;
2786
2787 fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2788 formatter.write_str("a string representing an URL")
2789 }
2790
2791 fn visit_str<E>(self, s: &str) -> Result<Self::Value, E>
2792 where
2793 E: Error,
2794 {
2795 Url::parse(s).map_err(|err| {
2796 let err_s = format!("{}", err);
2797 Error::invalid_value(Unexpected::Str(s), &err_s.as_str())
2798 })
2799 }
2800 }
2801
2802 deserializer.deserialize_str(UrlVisitor)
2803 }
2804}
2805
2806#[cfg(any(unix, target_os = "redox", target_os = "wasi"))]
2807fn path_to_file_url_segments(
2808 path: &Path,
2809 serialization: &mut String,
2810) -> Result<(u32, HostInternal), ()> {
2811 #[cfg(any(unix, target_os = "redox"))]
2812 use std::os::unix::prelude::OsStrExt;
2813 #[cfg(target_os = "wasi")]
2814 use std::os::wasi::prelude::OsStrExt;
2815 if !path.is_absolute() {
2816 return Err(());
2817 }
2818 let host_end = to_u32(serialization.len()).unwrap();
2819 let mut empty = true;
2820 // skip the root component
2821 for component in path.components().skip(1) {
2822 empty = false;
2823 serialization.push('/');
2824 serialization.extend(percent_encode(
2825 component.as_os_str().as_bytes(),
2826 PATH_SEGMENT,
2827 ));
2828 }
2829 if empty {
2830 // An URL’s path must not be empty.
2831 serialization.push('/');
2832 }
2833 Ok((host_end, HostInternal::None))
2834}
2835
2836#[cfg(windows)]
2837fn path_to_file_url_segments(
2838 path: &Path,
2839 serialization: &mut String,
2840) -> Result<(u32, HostInternal), ()> {
2841 path_to_file_url_segments_windows(path, serialization)
2842}
2843
2844// Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
2845#[cfg_attr(not(windows), allow(dead_code))]
2846fn path_to_file_url_segments_windows(
2847 path: &Path,
2848 serialization: &mut String,
2849) -> Result<(u32, HostInternal), ()> {
2850 use std::path::{Component, Prefix};
2851 if !path.is_absolute() {
2852 return Err(());
2853 }
2854 let mut components = path.components();
2855
2856 let host_start = serialization.len() + 1;
2857 let host_end;
2858 let host_internal;
2859
2860 match components.next() {
2861 Some(Component::Prefix(ref p)) => match p.kind() {
2862 Prefix::Disk(letter) | Prefix::VerbatimDisk(letter) => {
2863 host_end = to_u32(serialization.len()).unwrap();
2864 host_internal = HostInternal::None;
2865 serialization.push('/');
2866 serialization.push(letter as char);
2867 serialization.push(':');
2868 }
2869 Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => {
2870 let host = Host::parse(server.to_str().ok_or(())?).map_err(|_| ())?;
2871 write!(serialization, "{}", host).unwrap();
2872 host_end = to_u32(serialization.len()).unwrap();
2873 host_internal = host.into();
2874 serialization.push('/');
2875 let share = share.to_str().ok_or(())?;
2876 serialization.extend(percent_encode(share.as_bytes(), PATH_SEGMENT));
2877 }
2878 _ => return Err(()),
2879 },
2880 _ => return Err(()),
2881 }
2882
2883 let mut path_only_has_prefix = true;
2884 for component in components {
2885 if component == Component::RootDir {
2886 continue;
2887 }
2888
2889 path_only_has_prefix = false;
2890 // FIXME: somehow work with non-unicode?
2891 let component = component.as_os_str().to_str().ok_or(())?;
2892
2893 serialization.push('/');
2894 serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT));
2895 }
2896
2897 // A windows drive letter must end with a slash.
2898 if serialization.len() > host_start
2899 && parser::is_windows_drive_letter(&serialization[host_start..])
2900 && path_only_has_prefix
2901 {
2902 serialization.push('/');
2903 }
2904
2905 Ok((host_end, host_internal))
2906}
2907
2908#[cfg(any(unix, target_os = "redox", target_os = "wasi"))]
2909fn file_url_segments_to_pathbuf(
2910 host: Option<&str>,
2911 segments: str::Split<'_, char>,
2912) -> Result<PathBuf, ()> {
2913 use std::ffi::OsStr;
2914 #[cfg(any(unix, target_os = "redox"))]
2915 use std::os::unix::prelude::OsStrExt;
2916 #[cfg(target_os = "wasi")]
2917 use std::os::wasi::prelude::OsStrExt;
2918
2919 if host.is_some() {
2920 return Err(());
2921 }
2922
2923 let mut bytes = if cfg!(target_os = "redox") {
2924 b"file:".to_vec()
2925 } else {
2926 Vec::new()
2927 };
2928
2929 for segment in segments {
2930 bytes.push(b'/');
2931 bytes.extend(percent_decode(segment.as_bytes()));
2932 }
2933
2934 // A windows drive letter must end with a slash.
2935 if bytes.len() > 2
2936 && bytes[bytes.len() - 2].is_ascii_alphabetic()
2937 && matches!(bytes[bytes.len() - 1], b':' | b'|')
2938 {
2939 bytes.push(b'/');
2940 }
2941
2942 let os_str = OsStr::from_bytes(&bytes);
2943 let path = PathBuf::from(os_str);
2944
2945 debug_assert!(
2946 path.is_absolute(),
2947 "to_file_path() failed to produce an absolute Path"
2948 );
2949
2950 Ok(path)
2951}
2952
2953#[cfg(windows)]
2954fn file_url_segments_to_pathbuf(
2955 host: Option<&str>,
2956 segments: str::Split<char>,
2957) -> Result<PathBuf, ()> {
2958 file_url_segments_to_pathbuf_windows(host, segments)
2959}
2960
2961// Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
2962#[cfg_attr(not(windows), allow(dead_code))]
2963fn file_url_segments_to_pathbuf_windows(
2964 host: Option<&str>,
2965 mut segments: str::Split<'_, char>,
2966) -> Result<PathBuf, ()> {
2967 let mut string = if let Some(host) = host {
2968 r"\\".to_owned() + host
2969 } else {
2970 let first = segments.next().ok_or(())?;
2971
2972 match first.len() {
2973 2 => {
2974 if !first.starts_with(parser::ascii_alpha) || first.as_bytes()[1] != b':' {
2975 return Err(());
2976 }
2977
2978 first.to_owned()
2979 }
2980
2981 4 => {
2982 if !first.starts_with(parser::ascii_alpha) {
2983 return Err(());
2984 }
2985 let bytes = first.as_bytes();
2986 if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A') {
2987 return Err(());
2988 }
2989
2990 first[0..1].to_owned() + ":"
2991 }
2992
2993 _ => return Err(()),
2994 }
2995 };
2996
2997 for segment in segments {
2998 string.push('\\');
2999
3000 // Currently non-unicode windows paths cannot be represented
3001 match String::from_utf8(percent_decode(segment.as_bytes()).collect()) {
3002 Ok(s) => string.push_str(&s),
3003 Err(..) => return Err(()),
3004 }
3005 }
3006 let path = PathBuf::from(string);
3007 debug_assert!(
3008 path.is_absolute(),
3009 "to_file_path() failed to produce an absolute Path"
3010 );
3011 Ok(path)
3012}
3013
3014/// Implementation detail of `Url::query_pairs_mut`. Typically not used directly.
3015#[derive(Debug)]
3016pub struct UrlQuery<'a> {
3017 url: Option<&'a mut Url>,
3018 fragment: Option<String>,
3019}
3020
3021// `as_mut_string` string here exposes the internal serialization of an `Url`,
3022// which should not be exposed to users.
3023// We achieve that by not giving users direct access to `UrlQuery`:
3024// * Its fields are private
3025// (and so can not be constructed with struct literal syntax outside of this crate),
3026// * It has no constructor
3027// * It is only visible (on the type level) to users in the return type of
3028// `Url::query_pairs_mut` which is `Serializer<UrlQuery>`
3029// * `Serializer` keeps its target in a private field
3030// * Unlike in other `Target` impls, `UrlQuery::finished` does not return `Self`.
3031impl<'a> form_urlencoded::Target for UrlQuery<'a> {
3032 fn as_mut_string(&mut self) -> &mut String {
3033 &mut self.url.as_mut().unwrap().serialization
3034 }
3035
3036 fn finish(mut self) -> &'a mut Url {
3037 let url = self.url.take().unwrap();
3038 url.restore_already_parsed_fragment(self.fragment.take());
3039 url
3040 }
3041
3042 type Finished = &'a mut Url;
3043}
3044
3045impl<'a> Drop for UrlQuery<'a> {
3046 fn drop(&mut self) {
3047 if let Some(url) = self.url.take() {
3048 url.restore_already_parsed_fragment(self.fragment.take())
3049 }
3050 }
3051}