Skip to main content

webfetch/
guard.rs

1//! SSRF guard for the fetch path.
2//!
3//! `fetch` is reachable from the CLI and the MCP server, so a crafted URL or a
4//! prompt-injected link could otherwise be used to reach the cloud metadata
5//! endpoint (`169.254.169.254`), `localhost`, or services on the private
6//! network. This module rejects non-`http(s)` schemes and any URL whose host
7//! resolves to a non-public IP address, on both the initial request and every
8//! redirect hop.
9//!
10//! Set `WEBFETCH_ALLOW_PRIVATE=1` to disable the guard (for trusted internal
11//! use or tests).
12
13use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, ToSocketAddrs};
14
15use url::{Host, Url};
16
17/// Env var that, when set to `1`/`true`, disables the SSRF guard.
18const ALLOW_PRIVATE_ENV: &str = "WEBFETCH_ALLOW_PRIVATE";
19
20/// Whether the guard is disabled via environment opt-out.
21pub fn allow_private() -> bool {
22    matches!(
23        std::env::var(ALLOW_PRIVATE_ENV).ok().as_deref(),
24        Some("1") | Some("true") | Some("TRUE")
25    )
26}
27
28/// Returns true if `ip` is not safe to fetch from a public-web client:
29/// loopback, private, link-local (incl. cloud metadata), CGNAT, unspecified,
30/// multicast, broadcast, documentation/benchmark ranges, and the IPv6
31/// equivalents (ULA, link-local, IPv4-mapped).
32pub fn is_blocked_ip(ip: IpAddr) -> bool {
33    match ip {
34        IpAddr::V4(v4) => is_blocked_ipv4(v4),
35        IpAddr::V6(v6) => is_blocked_ipv6(v6),
36    }
37}
38
39fn is_blocked_ipv4(ip: Ipv4Addr) -> bool {
40    let o = ip.octets();
41    ip.is_loopback()           // 127.0.0.0/8
42        || ip.is_private()         // 10/8, 172.16/12, 192.168/16
43        || ip.is_link_local()     // 169.254.0.0/16 (cloud metadata)
44        || ip.is_broadcast()      // 255.255.255.255
45        || ip.is_unspecified()    // 0.0.0.0
46        || ip.is_multicast()      // 224.0.0.0/4
47        || ip.is_documentation()  // 192.0.2/24, 198.51.100/24, 203.0.113/24
48        || o[0] == 0              // 0.0.0.0/8 "this network"
49        || (o[0] == 100 && (o[1] & 0xc0) == 64) // 100.64.0.0/10 CGNAT
50        || (o[0] == 192 && o[1] == 0 && o[2] == 0) // 192.0.0.0/24 IETF protocol
51        || (o[0] == 198 && (o[1] & 0xfe) == 18) // 198.18.0.0/15 benchmarking
52        || o[0] >= 240 // 240.0.0.0/4 reserved (excludes broadcast already)
53}
54
55fn is_blocked_ipv6(ip: Ipv6Addr) -> bool {
56    // IPv4-mapped / -compatible: classify by the embedded IPv4 address.
57    if let Some(v4) = ip.to_ipv4_mapped() {
58        return is_blocked_ipv4(v4);
59    }
60    if let Some(v4) = ip.to_ipv4() {
61        // Covers ::a.b.c.d (incl. ::1 loopback and :: unspecified).
62        return is_blocked_ipv4(v4);
63    }
64    let seg = ip.segments();
65    ip.is_loopback()
66        || ip.is_unspecified()
67        || ip.is_multicast()
68        || (seg[0] & 0xffc0) == 0xfe80 // fe80::/10 link-local
69        || (seg[0] & 0xfe00) == 0xfc00 // fc00::/7 unique local (ULA)
70        || (seg[0] == 0x2001 && seg[1] == 0x0db8) // 2001:db8::/32 documentation
71}
72
73/// An error describing why a URL was rejected by the guard.
74#[derive(Debug)]
75pub struct BlockedUrl(pub String);
76
77impl std::fmt::Display for BlockedUrl {
78    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
79        write!(f, "blocked URL: {}", self.0)
80    }
81}
82
83impl std::error::Error for BlockedUrl {}
84
85/// Validate a URL's scheme and resolve+classify its host. On success returns
86/// the validated socket addresses (host resolved to public IPs) so the caller
87/// can pin the connection and avoid a DNS-rebinding TOCTOU window.
88///
89/// A no-op (returns `Ok(vec![])`) when the guard is disabled via env.
90pub fn validate_url(url: &Url) -> Result<Vec<std::net::SocketAddr>, BlockedUrl> {
91    if allow_private() {
92        return Ok(Vec::new());
93    }
94
95    match url.scheme() {
96        "http" | "https" => {}
97        other => return Err(BlockedUrl(format!("scheme `{other}` not allowed"))),
98    }
99
100    let host = url
101        .host()
102        .ok_or_else(|| BlockedUrl(format!("no host in {url}")))?;
103
104    match host {
105        Host::Ipv4(ip) => {
106            if is_blocked_ip(IpAddr::V4(ip)) {
107                return Err(BlockedUrl(format!("host IP {ip} is not public")));
108            }
109            Ok(Vec::new())
110        }
111        Host::Ipv6(ip) => {
112            if is_blocked_ip(IpAddr::V6(ip)) {
113                return Err(BlockedUrl(format!("host IP {ip} is not public")));
114            }
115            Ok(Vec::new())
116        }
117        Host::Domain(domain) => validate_domain(url, domain),
118    }
119}
120
121fn validate_domain(url: &Url, domain: &str) -> Result<Vec<std::net::SocketAddr>, BlockedUrl> {
122    // Block obvious local names early; DNS may also resolve these.
123    let lower = domain.to_ascii_lowercase();
124    if lower == "localhost" || lower.ends_with(".localhost") {
125        return Err(BlockedUrl(format!("host `{domain}` is local")));
126    }
127
128    let port = url
129        .port_or_known_default()
130        .ok_or_else(|| BlockedUrl(format!("no port for {url}")))?;
131
132    // Resolve and require that EVERY resolved address is public, then return
133    // them so the connection can be pinned to the validated set.
134    let addrs: Vec<_> = (domain, port)
135        .to_socket_addrs()
136        .map_err(|e| BlockedUrl(format!("cannot resolve `{domain}`: {e}")))?
137        .collect();
138
139    if addrs.is_empty() {
140        return Err(BlockedUrl(format!("`{domain}` resolved to no addresses")));
141    }
142    for addr in &addrs {
143        if is_blocked_ip(addr.ip()) {
144            return Err(BlockedUrl(format!(
145                "`{domain}` resolves to non-public IP {}",
146                addr.ip()
147            )));
148        }
149    }
150    Ok(addrs)
151}
152
153#[cfg(test)]
154mod tests {
155    use super::*;
156
157    fn blocked(s: &str) -> bool {
158        is_blocked_ip(s.parse::<IpAddr>().unwrap())
159    }
160
161    #[test]
162    fn blocks_loopback_and_private_and_metadata() {
163        assert!(blocked("127.0.0.1"));
164        assert!(blocked("10.0.0.1"));
165        assert!(blocked("172.16.5.4"));
166        assert!(blocked("192.168.1.1"));
167        assert!(blocked("169.254.169.254")); // cloud metadata
168        assert!(blocked("100.64.0.1")); // CGNAT
169        assert!(blocked("0.0.0.0"));
170        assert!(blocked("255.255.255.255"));
171        assert!(blocked("224.0.0.1")); // multicast
172        assert!(blocked("240.0.0.1")); // reserved
173    }
174
175    #[test]
176    fn blocks_ipv6_local_and_mapped() {
177        assert!(blocked("::1")); // loopback
178        assert!(blocked("::")); // unspecified
179        assert!(blocked("fe80::1")); // link-local
180        assert!(blocked("fc00::1")); // ULA
181        assert!(blocked("::ffff:127.0.0.1")); // v4-mapped loopback
182        assert!(blocked("::ffff:169.254.169.254")); // v4-mapped metadata
183    }
184
185    #[test]
186    fn allows_public() {
187        assert!(!blocked("1.1.1.1"));
188        assert!(!blocked("8.8.8.8"));
189        assert!(!blocked("93.184.216.34")); // example.com
190        assert!(!blocked("2606:4700:4700::1111")); // cloudflare v6
191    }
192
193    #[test]
194    fn rejects_non_http_scheme() {
195        let url = Url::parse("file:///etc/passwd").unwrap();
196        assert!(validate_url(&url).is_err());
197        let url = Url::parse("ftp://example.com/x").unwrap();
198        assert!(validate_url(&url).is_err());
199    }
200
201    #[test]
202    fn rejects_literal_metadata_ip_url() {
203        let url = Url::parse("http://169.254.169.254/latest/meta-data/").unwrap();
204        assert!(validate_url(&url).is_err());
205    }
206
207    #[test]
208    fn rejects_localhost_name() {
209        let url = Url::parse("http://localhost:8080/admin").unwrap();
210        assert!(validate_url(&url).is_err());
211    }
212}