url/
host.rs

1// Copyright 2013-2016 The rust-url developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9use scionnet::{IpAddr, Ipv4Addr, Ipv6Addr, ScionAddr};
10use std::cmp;
11use std::fmt::{self, Formatter};
12use std::str::FromStr;
13
14use percent_encoding::{percent_decode, utf8_percent_encode, CONTROLS};
15#[cfg(feature = "serde")]
16use serde::{Deserialize, Serialize};
17
18use crate::parser::{ParseError, ParseResult};
19
20#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
21#[derive(Copy, Clone, Debug, Eq, PartialEq)]
22pub(crate) enum HostInternal {
23    None,
24    Domain,
25    Ipv4(Ipv4Addr),
26    Ipv6(Ipv6Addr),
27    Scion(ScionAddr),
28}
29
30impl From<Host<String>> for HostInternal {
31    fn from(host: Host<String>) -> HostInternal {
32        match host {
33            Host::Domain(ref s) if s.is_empty() => HostInternal::None,
34            Host::Domain(_) => HostInternal::Domain,
35            Host::Ipv4(address) => HostInternal::Ipv4(address),
36            Host::Ipv6(address) => HostInternal::Ipv6(address),
37            Host::Scion(address) => HostInternal::Scion(address),
38        }
39    }
40}
41
42/// The host name of an URL.
43#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
44#[derive(Clone, Debug, Eq, Ord, PartialOrd, Hash)]
45pub enum Host<S = String> {
46    /// A DNS domain name, as '.' dot-separated labels.
47    /// Non-ASCII labels are encoded in punycode per IDNA if this is the host of
48    /// a special URL, or percent encoded for non-special URLs. Hosts for
49    /// non-special URLs are also called opaque hosts.
50    Domain(S),
51
52    /// An IPv4 address.
53    /// `Url::host_str` returns the serialization of this address,
54    /// as four decimal integers separated by `.` dots.
55    Ipv4(Ipv4Addr),
56
57    /// An IPv6 address.
58    /// `Url::host_str` returns the serialization of that address between `[` and `]` brackets,
59    /// in the format per [RFC 5952 *A Recommendation
60    /// for IPv6 Address Text Representation*](https://tools.ietf.org/html/rfc5952):
61    /// lowercase hexadecimal with maximal `::` compression.
62    Ipv6(Ipv6Addr),
63
64    Scion(ScionAddr),
65}
66
67impl<'a> Host<&'a str> {
68    /// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`.
69    pub fn to_owned(&self) -> Host<String> {
70        match *self {
71            Host::Domain(domain) => Host::Domain(domain.to_owned()),
72            Host::Ipv4(address) => Host::Ipv4(address),
73            Host::Ipv6(address) => Host::Ipv6(address),
74            Host::Scion(address) => Host::Scion(address),
75        }
76    }
77}
78
79impl Host<String> {
80    /// Parse a host: either an IPv6 address in [] square brackets, or a domain.
81    ///
82    /// <https://url.spec.whatwg.org/#host-parsing>
83    pub fn parse(input: &str) -> Result<Self, ParseError> {
84        if input.starts_with('[') {
85            if !input.ends_with(']') {
86                return Err(ParseError::InvalidIpv6Address);
87            }
88            if input.contains(",") {
89                let token: Vec<&str> = input[1..input.len() - 1].split(",").collect();
90                let host =
91                    IpAddr::from_str(token[1]).map_err(|_e| ParseError::InvalidScionAddress)?;
92                let ia: u64 = token[0]
93                    .parse::<u64>()
94                    .map_err(|_e| ParseError::InvalidScionAddress)?;
95                return Ok(Host::Scion(ScionAddr::new(ia, host)));
96            } else {
97                return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
98            }
99        }
100        let domain = percent_decode(input.as_bytes()).decode_utf8_lossy();
101
102        let domain = Self::domain_to_ascii(&domain)?;
103
104        if domain.is_empty() {
105            return Err(ParseError::EmptyHost);
106        }
107
108        let is_invalid_domain_char = |c| {
109            matches!(
110                c,
111                '\0'..='\u{001F}'
112                    | ' '
113                    | '#'
114                    | '%'
115                    | '/'
116                    | ':'
117                    | '<'
118                    | '>'
119                    | '?'
120                    | '@'
121                    | '['
122                    | '\\'
123                    | ']'
124                    | '^'
125                    | '\u{007F}'
126                    | '|'
127            )
128        };
129
130        if domain.find(is_invalid_domain_char).is_some() {
131            Err(ParseError::InvalidDomainCharacter)
132        } else if ends_in_a_number(&domain) {
133            let address = parse_ipv4addr(&domain)?;
134            Ok(Host::Ipv4(address))
135        } else {
136            Ok(Host::Domain(domain))
137        }
138    }
139
140    // <https://url.spec.whatwg.org/#concept-opaque-host-parser>
141    pub fn parse_opaque(input: &str) -> Result<Self, ParseError> {
142        if input.starts_with('[') {
143            if !input.ends_with(']') {
144                return Err(ParseError::InvalidIpv6Address);
145            }
146            if input.contains(",") {
147                let token: Vec<&str> = input[1..input.len() - 1].split(",").collect();
148                let host =
149                    IpAddr::from_str(token[1]).map_err(|_e| ParseError::InvalidScionAddress)?;
150                let ia: u64 = token[0]
151                    .parse::<u64>()
152                    .map_err(|_e| ParseError::InvalidScionAddress)?;
153                return Ok(Host::Scion(ScionAddr::new(ia, host)));
154            } else {
155                return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
156            }
157        }
158
159        let is_invalid_host_char = |c| {
160            matches!(
161                c,
162                '\0' | '\t'
163                    | '\n'
164                    | '\r'
165                    | ' '
166                    | '#'
167                    | '/'
168                    | ':'
169                    | '<'
170                    | '>'
171                    | '?'
172                    | '@'
173                    | '['
174                    | '\\'
175                    | ']'
176                    | '^'
177                    | '|'
178            )
179        };
180
181        if input.find(is_invalid_host_char).is_some() {
182            Err(ParseError::InvalidDomainCharacter)
183        } else {
184            Ok(Host::Domain(
185                utf8_percent_encode(input, CONTROLS).to_string(),
186            ))
187        }
188    }
189
190    /// convert domain with idna
191    fn domain_to_ascii(domain: &str) -> Result<String, ParseError> {
192        idna::domain_to_ascii(domain).map_err(Into::into)
193    }
194}
195
196impl<S: AsRef<str>> fmt::Display for Host<S> {
197    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
198        match *self {
199            Host::Domain(ref domain) => domain.as_ref().fmt(f),
200            Host::Ipv4(ref addr) => addr.fmt(f),
201            Host::Ipv6(ref addr) => {
202                f.write_str("[")?;
203                write_ipv6(addr, f)?;
204                f.write_str("]")
205            }
206            Host::Scion(ref addr) => {
207                f.write_str("[")?;
208                write_scion(addr, f)?;
209                f.write_str("]")
210            }
211        }
212    }
213}
214
215impl<S, T> PartialEq<Host<T>> for Host<S>
216where
217    S: PartialEq<T>,
218{
219    fn eq(&self, other: &Host<T>) -> bool {
220        match (self, other) {
221            (Host::Domain(a), Host::Domain(b)) => a == b,
222            (Host::Ipv4(a), Host::Ipv4(b)) => a == b,
223            (Host::Ipv6(a), Host::Ipv6(b)) => a == b,
224            (Host::Scion(s1), Host::Scion(s2)) => s1 == s2,
225            (_, _) => false,
226        }
227    }
228}
229
230/*
231 * scion addresses are mangled into URLs as IP literals
232 * [IA,IP]:port
233 */
234fn write_scion(addr: &ScionAddr, f: &mut Formatter<'_>) -> fmt::Result {
235    f.write_str(&addr.get_ia().to_string())?;
236    f.write_str(",")?;
237    f.write_str(&addr.get_host().to_string())
238}
239
240fn write_ipv6(addr: &Ipv6Addr, f: &mut Formatter<'_>) -> fmt::Result {
241    let segments = addr.segments();
242    let (compress_start, compress_end) = longest_zero_sequence(&segments);
243    let mut i = 0;
244    while i < 8 {
245        if i == compress_start {
246            f.write_str(":")?;
247            if i == 0 {
248                f.write_str(":")?;
249            }
250            if compress_end < 8 {
251                i = compress_end;
252            } else {
253                break;
254            }
255        }
256        write!(f, "{:x}", segments[i as usize])?;
257        if i < 7 {
258            f.write_str(":")?;
259        }
260        i += 1;
261    }
262    Ok(())
263}
264
265// https://url.spec.whatwg.org/#concept-ipv6-serializer step 2 and 3
266fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) {
267    let mut longest = -1;
268    let mut longest_length = -1;
269    let mut start = -1;
270    macro_rules! finish_sequence(
271        ($end: expr) => {
272            if start >= 0 {
273                let length = $end - start;
274                if length > longest_length {
275                    longest = start;
276                    longest_length = length;
277                }
278            }
279        };
280    );
281    for i in 0..8 {
282        if pieces[i as usize] == 0 {
283            if start < 0 {
284                start = i;
285            }
286        } else {
287            finish_sequence!(i);
288            start = -1;
289        }
290    }
291    finish_sequence!(8);
292    // https://url.spec.whatwg.org/#concept-ipv6-serializer
293    // step 3: ignore lone zeroes
294    if longest_length < 2 {
295        (-1, -2)
296    } else {
297        (longest, longest + longest_length)
298    }
299}
300
301/// <https://url.spec.whatwg.org/#ends-in-a-number-checker>
302fn ends_in_a_number(input: &str) -> bool {
303    let mut parts = input.rsplit('.');
304    let last = parts.next().unwrap();
305    let last = if last.is_empty() {
306        if let Some(last) = parts.next() {
307            last
308        } else {
309            return false;
310        }
311    } else {
312        last
313    };
314    if !last.is_empty() && last.as_bytes().iter().all(|c| c.is_ascii_digit()) {
315        return true;
316    }
317
318    parse_ipv4number(last).is_ok()
319}
320
321/// <https://url.spec.whatwg.org/#ipv4-number-parser>
322/// Ok(None) means the input is a valid number, but it overflows a `u32`.
323fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
324    if input.is_empty() {
325        return Err(());
326    }
327
328    let mut r = 10;
329    if input.starts_with("0x") || input.starts_with("0X") {
330        input = &input[2..];
331        r = 16;
332    } else if input.len() >= 2 && input.starts_with('0') {
333        input = &input[1..];
334        r = 8;
335    }
336
337    if input.is_empty() {
338        return Ok(Some(0));
339    }
340
341    let valid_number = match r {
342        8 => input.as_bytes().iter().all(|c| (b'0'..=b'7').contains(c)),
343        10 => input.as_bytes().iter().all(|c| c.is_ascii_digit()),
344        16 => input.as_bytes().iter().all(|c| c.is_ascii_hexdigit()),
345        _ => false,
346    };
347    if !valid_number {
348        return Err(());
349    }
350
351    match u32::from_str_radix(input, r) {
352        Ok(num) => Ok(Some(num)),
353        Err(_) => Ok(None), // The only possible error kind here is an integer overflow.
354                            // The validity of the chars in the input is checked above.
355    }
356}
357
358/// <https://url.spec.whatwg.org/#concept-ipv4-parser>
359fn parse_ipv4addr(input: &str) -> ParseResult<Ipv4Addr> {
360    let mut parts: Vec<&str> = input.split('.').collect();
361    if parts.last() == Some(&"") {
362        parts.pop();
363    }
364    if parts.len() > 4 {
365        return Err(ParseError::InvalidIpv4Address);
366    }
367    let mut numbers: Vec<u32> = Vec::new();
368    for part in parts {
369        match parse_ipv4number(part) {
370            Ok(Some(n)) => numbers.push(n),
371            Ok(None) => return Err(ParseError::InvalidIpv4Address), // u32 overflow
372            Err(()) => return Err(ParseError::InvalidIpv4Address),
373        };
374    }
375    let mut ipv4 = numbers.pop().expect("a non-empty list of numbers");
376    // Equivalent to: ipv4 >= 256 ** (4 − numbers.len())
377    if ipv4 > u32::max_value() >> (8 * numbers.len() as u32) {
378        return Err(ParseError::InvalidIpv4Address);
379    }
380    if numbers.iter().any(|x| *x > 255) {
381        return Err(ParseError::InvalidIpv4Address);
382    }
383    for (counter, n) in numbers.iter().enumerate() {
384        ipv4 += n << (8 * (3 - counter as u32))
385    }
386    Ok(Ipv4Addr::from(ipv4))
387}
388
389/// <https://url.spec.whatwg.org/#concept-ipv6-parser>
390fn parse_ipv6addr(input: &str) -> ParseResult<Ipv6Addr> {
391    let input = input.as_bytes();
392    let len = input.len();
393    let mut is_ip_v4 = false;
394    let mut pieces = [0, 0, 0, 0, 0, 0, 0, 0];
395    let mut piece_pointer = 0;
396    let mut compress_pointer = None;
397    let mut i = 0;
398
399    if len < 2 {
400        return Err(ParseError::InvalidIpv6Address);
401    }
402
403    if input[0] == b':' {
404        if input[1] != b':' {
405            return Err(ParseError::InvalidIpv6Address);
406        }
407        i = 2;
408        piece_pointer = 1;
409        compress_pointer = Some(1);
410    }
411
412    while i < len {
413        if piece_pointer == 8 {
414            return Err(ParseError::InvalidIpv6Address);
415        }
416        if input[i] == b':' {
417            if compress_pointer.is_some() {
418                return Err(ParseError::InvalidIpv6Address);
419            }
420            i += 1;
421            piece_pointer += 1;
422            compress_pointer = Some(piece_pointer);
423            continue;
424        }
425        let start = i;
426        let end = cmp::min(len, start + 4);
427        let mut value = 0u16;
428        while i < end {
429            match (input[i] as char).to_digit(16) {
430                Some(digit) => {
431                    value = value * 0x10 + digit as u16;
432                    i += 1;
433                }
434                None => break,
435            }
436        }
437        if i < len {
438            match input[i] {
439                b'.' => {
440                    if i == start {
441                        return Err(ParseError::InvalidIpv6Address);
442                    }
443                    i = start;
444                    if piece_pointer > 6 {
445                        return Err(ParseError::InvalidIpv6Address);
446                    }
447                    is_ip_v4 = true;
448                }
449                b':' => {
450                    i += 1;
451                    if i == len {
452                        return Err(ParseError::InvalidIpv6Address);
453                    }
454                }
455                _ => return Err(ParseError::InvalidIpv6Address),
456            }
457        }
458        if is_ip_v4 {
459            break;
460        }
461        pieces[piece_pointer] = value;
462        piece_pointer += 1;
463    }
464
465    if is_ip_v4 {
466        if piece_pointer > 6 {
467            return Err(ParseError::InvalidIpv6Address);
468        }
469        let mut numbers_seen = 0;
470        while i < len {
471            if numbers_seen > 0 {
472                if numbers_seen < 4 && (i < len && input[i] == b'.') {
473                    i += 1
474                } else {
475                    return Err(ParseError::InvalidIpv6Address);
476                }
477            }
478
479            let mut ipv4_piece = None;
480            while i < len {
481                let digit = match input[i] {
482                    c @ b'0'..=b'9' => c - b'0',
483                    _ => break,
484                };
485                match ipv4_piece {
486                    None => ipv4_piece = Some(digit as u16),
487                    Some(0) => return Err(ParseError::InvalidIpv6Address), // No leading zero
488                    Some(ref mut v) => {
489                        *v = *v * 10 + digit as u16;
490                        if *v > 255 {
491                            return Err(ParseError::InvalidIpv6Address);
492                        }
493                    }
494                }
495                i += 1;
496            }
497
498            pieces[piece_pointer] = if let Some(v) = ipv4_piece {
499                pieces[piece_pointer] * 0x100 + v
500            } else {
501                return Err(ParseError::InvalidIpv6Address);
502            };
503            numbers_seen += 1;
504
505            if numbers_seen == 2 || numbers_seen == 4 {
506                piece_pointer += 1;
507            }
508        }
509
510        if numbers_seen != 4 {
511            return Err(ParseError::InvalidIpv6Address);
512        }
513    }
514
515    if i < len {
516        return Err(ParseError::InvalidIpv6Address);
517    }
518
519    match compress_pointer {
520        Some(compress_pointer) => {
521            let mut swaps = piece_pointer - compress_pointer;
522            piece_pointer = 7;
523            while swaps > 0 {
524                pieces.swap(piece_pointer, compress_pointer + swaps - 1);
525                swaps -= 1;
526                piece_pointer -= 1;
527            }
528        }
529        _ => {
530            if piece_pointer != 8 {
531                return Err(ParseError::InvalidIpv6Address);
532            }
533        }
534    }
535    Ok(Ipv6Addr::new(
536        pieces[0], pieces[1], pieces[2], pieces[3], pieces[4], pieces[5], pieces[6], pieces[7],
537    ))
538}