url_fork/
host.rs

1// Copyright 2013-2016 The rust-url developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9use alloc::borrow::ToOwned;
10use alloc::string::{String, ToString};
11use alloc::vec::Vec;
12use core::cmp;
13use core::fmt::{self, Formatter};
14
15use percent_encoding::{percent_decode, utf8_percent_encode, CONTROLS};
16#[cfg(feature = "serde")]
17use serde::{Deserialize, Serialize};
18
19use crate::net::{Ipv4Addr, Ipv6Addr};
20use crate::parser::{ParseError, ParseResult};
21
22#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
23#[derive(Copy, Clone, Debug, Eq, PartialEq)]
24pub(crate) enum HostInternal {
25    None,
26    Domain,
27    Ipv4(Ipv4Addr),
28    Ipv6(Ipv6Addr),
29}
30
31impl From<Host<String>> for HostInternal {
32    fn from(host: Host<String>) -> HostInternal {
33        match host {
34            Host::Domain(ref s) if s.is_empty() => HostInternal::None,
35            Host::Domain(_) => HostInternal::Domain,
36            Host::Ipv4(address) => HostInternal::Ipv4(address),
37            Host::Ipv6(address) => HostInternal::Ipv6(address),
38        }
39    }
40}
41
42/// The host name of an URL.
43#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
44#[derive(Clone, Debug, Eq, Ord, PartialOrd, Hash)]
45pub enum Host<S = String> {
46    /// A DNS domain name, as '.' dot-separated labels.
47    /// Non-ASCII labels are encoded in punycode per IDNA if this is the host of
48    /// a special URL, or percent encoded for non-special URLs. Hosts for
49    /// non-special URLs are also called opaque hosts.
50    Domain(S),
51
52    /// An IPv4 address.
53    /// `Url::host_str` returns the serialization of this address,
54    /// as four decimal integers separated by `.` dots.
55    Ipv4(Ipv4Addr),
56
57    /// An IPv6 address.
58    /// `Url::host_str` returns the serialization of that address between `[` and `]` brackets,
59    /// in the format per [RFC 5952 *A Recommendation
60    /// for IPv6 Address Text Representation*](https://tools.ietf.org/html/rfc5952):
61    /// lowercase hexadecimal with maximal `::` compression.
62    Ipv6(Ipv6Addr),
63}
64
65impl<'a> Host<&'a str> {
66    /// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`.
67    pub fn to_owned(&self) -> Host<String> {
68        match *self {
69            Host::Domain(domain) => Host::Domain(domain.to_owned()),
70            Host::Ipv4(address) => Host::Ipv4(address),
71            Host::Ipv6(address) => Host::Ipv6(address),
72        }
73    }
74}
75
76impl Host<String> {
77    /// Parse a host: either an IPv6 address in [] square brackets, or a domain.
78    ///
79    /// <https://url.spec.whatwg.org/#host-parsing>
80    pub fn parse(input: &str) -> Result<Self, ParseError> {
81        if input.starts_with('[') {
82            if !input.ends_with(']') {
83                return Err(ParseError::InvalidIpv6Address);
84            }
85            return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
86        }
87        let domain = percent_decode(input.as_bytes()).decode_utf8_lossy();
88
89        let domain = Self::domain_to_ascii(&domain)?;
90
91        if domain.is_empty() {
92            return Err(ParseError::EmptyHost);
93        }
94
95        let is_invalid_domain_char = |c| {
96            matches!(
97                c,
98                '\0'..='\u{001F}'
99                    | ' '
100                    | '#'
101                    | '%'
102                    | '/'
103                    | ':'
104                    | '<'
105                    | '>'
106                    | '?'
107                    | '@'
108                    | '['
109                    | '\\'
110                    | ']'
111                    | '^'
112                    | '\u{007F}'
113                    | '|'
114            )
115        };
116
117        if domain.find(is_invalid_domain_char).is_some() {
118            Err(ParseError::InvalidDomainCharacter)
119        } else if ends_in_a_number(&domain) {
120            let address = parse_ipv4addr(&domain)?;
121            Ok(Host::Ipv4(address))
122        } else {
123            Ok(Host::Domain(domain))
124        }
125    }
126
127    // <https://url.spec.whatwg.org/#concept-opaque-host-parser>
128    pub fn parse_opaque(input: &str) -> Result<Self, ParseError> {
129        if input.starts_with('[') {
130            if !input.ends_with(']') {
131                return Err(ParseError::InvalidIpv6Address);
132            }
133            return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
134        }
135
136        let is_invalid_host_char = |c| {
137            matches!(
138                c,
139                '\0' | '\t'
140                    | '\n'
141                    | '\r'
142                    | ' '
143                    | '#'
144                    | '/'
145                    | ':'
146                    | '<'
147                    | '>'
148                    | '?'
149                    | '@'
150                    | '['
151                    | '\\'
152                    | ']'
153                    | '^'
154                    | '|'
155            )
156        };
157
158        if input.find(is_invalid_host_char).is_some() {
159            Err(ParseError::InvalidDomainCharacter)
160        } else {
161            Ok(Host::Domain(
162                utf8_percent_encode(input, CONTROLS).to_string(),
163            ))
164        }
165    }
166
167    /// convert domain with idna
168    fn domain_to_ascii(domain: &str) -> Result<String, ParseError> {
169        idna::domain_to_ascii(domain).map_err(Into::into)
170    }
171}
172
173impl<S: AsRef<str>> fmt::Display for Host<S> {
174    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
175        match *self {
176            Host::Domain(ref domain) => domain.as_ref().fmt(f),
177            Host::Ipv4(ref addr) => addr.fmt(f),
178            Host::Ipv6(ref addr) => {
179                f.write_str("[")?;
180                write_ipv6(addr, f)?;
181                f.write_str("]")
182            }
183        }
184    }
185}
186
187impl<S, T> PartialEq<Host<T>> for Host<S>
188where
189    S: PartialEq<T>,
190{
191    fn eq(&self, other: &Host<T>) -> bool {
192        match (self, other) {
193            (Host::Domain(a), Host::Domain(b)) => a == b,
194            (Host::Ipv4(a), Host::Ipv4(b)) => a == b,
195            (Host::Ipv6(a), Host::Ipv6(b)) => a == b,
196            (_, _) => false,
197        }
198    }
199}
200
201fn write_ipv6(addr: &Ipv6Addr, f: &mut Formatter<'_>) -> fmt::Result {
202    let segments = addr.segments();
203    let (compress_start, compress_end) = longest_zero_sequence(&segments);
204    let mut i = 0;
205    while i < 8 {
206        if i == compress_start {
207            f.write_str(":")?;
208            if i == 0 {
209                f.write_str(":")?;
210            }
211            if compress_end < 8 {
212                i = compress_end;
213            } else {
214                break;
215            }
216        }
217        write!(f, "{:x}", segments[i as usize])?;
218        if i < 7 {
219            f.write_str(":")?;
220        }
221        i += 1;
222    }
223    Ok(())
224}
225
226// https://url.spec.whatwg.org/#concept-ipv6-serializer step 2 and 3
227fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) {
228    let mut longest = -1;
229    let mut longest_length = -1;
230    let mut start = -1;
231    macro_rules! finish_sequence(
232        ($end: expr) => {
233            if start >= 0 {
234                let length = $end - start;
235                if length > longest_length {
236                    longest = start;
237                    longest_length = length;
238                }
239            }
240        };
241    );
242    for i in 0..8 {
243        if pieces[i as usize] == 0 {
244            if start < 0 {
245                start = i;
246            }
247        } else {
248            finish_sequence!(i);
249            start = -1;
250        }
251    }
252    finish_sequence!(8);
253    // https://url.spec.whatwg.org/#concept-ipv6-serializer
254    // step 3: ignore lone zeroes
255    if longest_length < 2 {
256        (-1, -2)
257    } else {
258        (longest, longest + longest_length)
259    }
260}
261
262/// <https://url.spec.whatwg.org/#ends-in-a-number-checker>
263fn ends_in_a_number(input: &str) -> bool {
264    let mut parts = input.rsplit('.');
265    let last = parts.next().unwrap();
266    let last = if last.is_empty() {
267        if let Some(last) = parts.next() {
268            last
269        } else {
270            return false;
271        }
272    } else {
273        last
274    };
275    if !last.is_empty() && last.as_bytes().iter().all(|c| c.is_ascii_digit()) {
276        return true;
277    }
278
279    parse_ipv4number(last).is_ok()
280}
281
282/// <https://url.spec.whatwg.org/#ipv4-number-parser>
283/// Ok(None) means the input is a valid number, but it overflows a `u32`.
284fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
285    if input.is_empty() {
286        return Err(());
287    }
288
289    let mut r = 10;
290    if input.starts_with("0x") || input.starts_with("0X") {
291        input = &input[2..];
292        r = 16;
293    } else if input.len() >= 2 && input.starts_with('0') {
294        input = &input[1..];
295        r = 8;
296    }
297
298    if input.is_empty() {
299        return Ok(Some(0));
300    }
301
302    let valid_number = match r {
303        8 => input.as_bytes().iter().all(|c| (b'0'..=b'7').contains(c)),
304        10 => input.as_bytes().iter().all(|c| c.is_ascii_digit()),
305        16 => input.as_bytes().iter().all(|c| c.is_ascii_hexdigit()),
306        _ => false,
307    };
308    if !valid_number {
309        return Err(());
310    }
311
312    match u32::from_str_radix(input, r) {
313        Ok(num) => Ok(Some(num)),
314        Err(_) => Ok(None), /* The only possible error kind here is an integer overflow.
315                             * The validity of the chars in the input is checked above. */
316    }
317}
318
319/// <https://url.spec.whatwg.org/#concept-ipv4-parser>
320fn parse_ipv4addr(input: &str) -> ParseResult<Ipv4Addr> {
321    let mut parts: Vec<&str> = input.split('.').collect();
322    if parts.last() == Some(&"") {
323        parts.pop();
324    }
325    if parts.len() > 4 {
326        return Err(ParseError::InvalidIpv4Address);
327    }
328    let mut numbers: Vec<u32> = Vec::new();
329    for part in parts {
330        match parse_ipv4number(part) {
331            Ok(Some(n)) => numbers.push(n),
332            Ok(None) => return Err(ParseError::InvalidIpv4Address), // u32 overflow
333            Err(()) => return Err(ParseError::InvalidIpv4Address),
334        };
335    }
336    let mut ipv4 = numbers.pop().expect("a non-empty list of numbers");
337    // Equivalent to: ipv4 >= 256 ** (4 − numbers.len())
338    if ipv4 > u32::max_value() >> (8 * numbers.len() as u32) {
339        return Err(ParseError::InvalidIpv4Address);
340    }
341    if numbers.iter().any(|x| *x > 255) {
342        return Err(ParseError::InvalidIpv4Address);
343    }
344    for (counter, n) in numbers.iter().enumerate() {
345        ipv4 += n << (8 * (3 - counter as u32))
346    }
347    Ok(Ipv4Addr::from(ipv4))
348}
349
350/// <https://url.spec.whatwg.org/#concept-ipv6-parser>
351fn parse_ipv6addr(input: &str) -> ParseResult<Ipv6Addr> {
352    let input = input.as_bytes();
353    let len = input.len();
354    let mut is_ip_v4 = false;
355    let mut pieces = [0, 0, 0, 0, 0, 0, 0, 0];
356    let mut piece_pointer = 0;
357    let mut compress_pointer = None;
358    let mut i = 0;
359
360    if len < 2 {
361        return Err(ParseError::InvalidIpv6Address);
362    }
363
364    if input[0] == b':' {
365        if input[1] != b':' {
366            return Err(ParseError::InvalidIpv6Address);
367        }
368        i = 2;
369        piece_pointer = 1;
370        compress_pointer = Some(1);
371    }
372
373    while i < len {
374        if piece_pointer == 8 {
375            return Err(ParseError::InvalidIpv6Address);
376        }
377        if input[i] == b':' {
378            if compress_pointer.is_some() {
379                return Err(ParseError::InvalidIpv6Address);
380            }
381            i += 1;
382            piece_pointer += 1;
383            compress_pointer = Some(piece_pointer);
384            continue;
385        }
386        let start = i;
387        let end = cmp::min(len, start + 4);
388        let mut value = 0u16;
389        while i < end {
390            match (input[i] as char).to_digit(16) {
391                Some(digit) => {
392                    value = value * 0x10 + digit as u16;
393                    i += 1;
394                }
395                None => break,
396            }
397        }
398        if i < len {
399            match input[i] {
400                b'.' => {
401                    if i == start {
402                        return Err(ParseError::InvalidIpv6Address);
403                    }
404                    i = start;
405                    if piece_pointer > 6 {
406                        return Err(ParseError::InvalidIpv6Address);
407                    }
408                    is_ip_v4 = true;
409                }
410                b':' => {
411                    i += 1;
412                    if i == len {
413                        return Err(ParseError::InvalidIpv6Address);
414                    }
415                }
416                _ => return Err(ParseError::InvalidIpv6Address),
417            }
418        }
419        if is_ip_v4 {
420            break;
421        }
422        pieces[piece_pointer] = value;
423        piece_pointer += 1;
424    }
425
426    if is_ip_v4 {
427        if piece_pointer > 6 {
428            return Err(ParseError::InvalidIpv6Address);
429        }
430        let mut numbers_seen = 0;
431        while i < len {
432            if numbers_seen > 0 {
433                if numbers_seen < 4 && (i < len && input[i] == b'.') {
434                    i += 1
435                } else {
436                    return Err(ParseError::InvalidIpv6Address);
437                }
438            }
439
440            let mut ipv4_piece = None;
441            while i < len {
442                let digit = match input[i] {
443                    c @ b'0'..=b'9' => c - b'0',
444                    _ => break,
445                };
446                match ipv4_piece {
447                    None => ipv4_piece = Some(digit as u16),
448                    Some(0) => return Err(ParseError::InvalidIpv6Address), // No leading zero
449                    Some(ref mut v) => {
450                        *v = *v * 10 + digit as u16;
451                        if *v > 255 {
452                            return Err(ParseError::InvalidIpv6Address);
453                        }
454                    }
455                }
456                i += 1;
457            }
458
459            pieces[piece_pointer] = if let Some(v) = ipv4_piece {
460                pieces[piece_pointer] * 0x100 + v
461            } else {
462                return Err(ParseError::InvalidIpv6Address);
463            };
464            numbers_seen += 1;
465
466            if numbers_seen == 2 || numbers_seen == 4 {
467                piece_pointer += 1;
468            }
469        }
470
471        if numbers_seen != 4 {
472            return Err(ParseError::InvalidIpv6Address);
473        }
474    }
475
476    if i < len {
477        return Err(ParseError::InvalidIpv6Address);
478    }
479
480    match compress_pointer {
481        Some(compress_pointer) => {
482            let mut swaps = piece_pointer - compress_pointer;
483            piece_pointer = 7;
484            while swaps > 0 {
485                pieces.swap(piece_pointer, compress_pointer + swaps - 1);
486                swaps -= 1;
487                piece_pointer -= 1;
488            }
489        }
490        _ => {
491            if piece_pointer != 8 {
492                return Err(ParseError::InvalidIpv6Address);
493            }
494        }
495    }
496    Ok(Ipv6Addr::new(
497        pieces[0], pieces[1], pieces[2], pieces[3], pieces[4], pieces[5], pieces[6], pieces[7],
498    ))
499}