1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
//! IDN (Internationalized Domain Name) host normalisation.
//!
//! Converts a Unicode hostname to its ASCII/punycode form per UTS-46 (e.g.
//! `münchen.de` → `xn--mnchen-3ya.de`) so it can be used for DNS resolution,
//! the HTTP `Host:` header, and TLS SNI — all of which expect ASCII.
//!
//! Gated by the `idn` Cargo feature (on by default), implemented with the
//! first-party pure-Rust `intl` crate's `idna` module. Without the feature the
//! `intl` dependency and its Unicode tables are dropped and [`to_ascii`] is a
//! pure passthrough, so a Unicode host flows through unchanged (and typically
//! fails to resolve) — the correct "no IDN compiled" behaviour.
#[cfg(feature = "idn")]
use crate::error::Error;
use crate::error::Result;
/// Return the ASCII/punycode form of `host` when `enabled` and the host is
/// non-ASCII; otherwise return `host` unchanged.
///
/// Idempotent: an already-ASCII host (IPv4 literal, bracketed IPv6, `localhost`,
/// an already-punycode name, or any plain ASCII name) is never handed to the
/// IDNA encoder and comes back byte-identical. The disabled path and the
/// feature-off build are likewise plain passthroughs.
pub(crate) fn to_ascii(host: &str, enabled: bool) -> Result<String> {
#[cfg(feature = "idn")]
if enabled && !host.is_ascii() {
let ascii = intl::unicode::idna::to_ascii(host)
.map_err(|_| Error::InvalidUrl(format!("invalid IDN host: {host}")))?;
// UTS-46 `to_ascii` still maps fullwidth/compatibility code points onto
// ASCII authority delimiters (e.g. U+FF20 `@` → `@`, U+FF0F `/` → `/`,
// U+FF1A `:` → `:`) without rejecting them at the source. That output
// is written straight back into
// `Url::host` AFTER parse-time validation, so without re-checking it an
// attacker can smuggle a delimiter past the parser and trigger
// origin/host confusion (DNS, SNI, `Host:` header, proxy request line,
// pool key). A legitimate punycode/ASCII hostname is only letters,
// digits, hyphens, and dots — none of the bytes below — so reject any
// encoded output that still carries one. This branch never sees a
// bracketed IPv6 literal (those are ASCII and skip the encoder), so
// rejecting `:` here is safe.
if ascii.bytes().any(|b| {
b < 0x20
|| matches!(
b,
0x7f | b' ' | b'/' | b'\\' | b'@' | b':' | b'?' | b'#' | b'%'
)
}) {
return Err(Error::InvalidUrl(format!(
"IDN host encodes to a forbidden authority delimiter: {host}"
)));
}
return Ok(ascii);
}
let _ = enabled;
Ok(host.to_string())
}
#[cfg(test)]
mod tests {
use super::to_ascii;
#[test]
fn ascii_hosts_pass_through_unchanged() {
for h in [
"example.com",
"127.0.0.1",
"[::1]",
"xn--mnchen-3ya.de", // already punycode
"localhost",
"Example.COM", // case preserved: ASCII never touches idna
] {
assert_eq!(
to_ascii(h, true).unwrap(),
h,
"ASCII host must be untouched: {h}"
);
}
}
#[cfg(feature = "idn")]
#[test]
fn unicode_host_is_punycoded_when_enabled() {
assert_eq!(to_ascii("münchen.de", true).unwrap(), "xn--mnchen-3ya.de");
assert_eq!(to_ascii("☃.net", true).unwrap(), "xn--n3h.net");
}
#[test]
fn disabled_leaves_unicode_raw() {
assert_eq!(to_ascii("münchen.de", false).unwrap(), "münchen.de");
}
/// UTS-46 with UseSTD3ASCIIRules=false maps fullwidth/compatibility code
/// points onto ASCII authority delimiters. Each of these must be rejected
/// after IDN encoding so the delimiter cannot be smuggled past parse-time
/// host validation (origin/host-confusion hardening).
#[cfg(feature = "idn")]
#[test]
fn rejects_idn_authority_delimiter_injection() {
for input in [
"@evil.com", // U+FF20 -> '@'
"good.com/../evil.com", // U+FF0F -> '/'
"good.com:8080", // U+FF1A -> ':'
"evil#.com", // U+FF03 -> '#'
"x?y.com", // U+FF1F -> '?'
] {
assert!(
to_ascii(input, true).is_err(),
"IDN delimiter injection must be rejected: {input:?}"
);
}
}
/// The guard must not break legitimate internationalised or ASCII hosts.
#[cfg(feature = "idn")]
#[test]
fn legitimate_hosts_still_succeed_after_guard() {
assert_eq!(to_ascii("münchen.de", true).unwrap(), "xn--mnchen-3ya.de");
// Already-ASCII host is untouched.
assert_eq!(to_ascii("example.com", true).unwrap(), "example.com");
}
}