Skip to main content

webfetch/
guard.rs

1//! SSRF guard for the fetch path.
2//!
3//! `fetch` is reachable from the CLI and the MCP server, so a crafted URL or a
4//! prompt-injected link could otherwise be used to reach the cloud metadata
5//! endpoint (`169.254.169.254`), `localhost`, or services on the private
6//! network. This module rejects non-`http(s)` schemes and any URL whose host
7//! resolves to a non-public IP address, on both the initial request and every
8//! redirect hop.
9//!
10//! Set `WEBFETCH_ALLOW_PRIVATE=1` to disable the guard (for trusted internal
11//! use or tests).
12
13use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
14use std::sync::Once;
15
16use url::{Host, Url};
17
18/// Env var that, when set to `1`/`true`, disables the SSRF guard.
19const ALLOW_PRIVATE_ENV: &str = "WEBFETCH_ALLOW_PRIVATE";
20
21static ALLOW_PRIVATE_WARNING: Once = Once::new();
22
23/// Whether the guard is disabled via environment opt-out.
24///
25/// When active, emits a one-line warning to stderr (once per process) so an
26/// operator can see the SSRF guard has been turned off and private, loopback,
27/// and cloud-metadata addresses are reachable.
28pub fn allow_private() -> bool {
29    let enabled = matches!(
30        std::env::var(ALLOW_PRIVATE_ENV).ok().as_deref(),
31        Some("1") | Some("true") | Some("TRUE")
32    );
33    if enabled {
34        ALLOW_PRIVATE_WARNING.call_once(|| {
35            eprintln!(
36                "warning: {ALLOW_PRIVATE_ENV} is set — SSRF guard disabled; \
37                 private, loopback, and metadata IPs are reachable"
38            );
39        });
40    }
41    enabled
42}
43
44/// Returns true if `ip` is not safe to fetch from a public-web client:
45/// loopback, private, link-local (incl. cloud metadata), CGNAT, unspecified,
46/// multicast, broadcast, documentation/benchmark ranges, and the IPv6
47/// equivalents (ULA, link-local, IPv4-mapped).
48pub fn is_blocked_ip(ip: IpAddr) -> bool {
49    match ip {
50        IpAddr::V4(v4) => is_blocked_ipv4(v4),
51        IpAddr::V6(v6) => is_blocked_ipv6(v6),
52    }
53}
54
55fn is_blocked_ipv4(ip: Ipv4Addr) -> bool {
56    let o = ip.octets();
57    ip.is_loopback()           // 127.0.0.0/8
58        || ip.is_private()         // 10/8, 172.16/12, 192.168/16
59        || ip.is_link_local()     // 169.254.0.0/16 (cloud metadata)
60        || ip.is_broadcast()      // 255.255.255.255
61        || ip.is_unspecified()    // 0.0.0.0
62        || ip.is_multicast()      // 224.0.0.0/4
63        || ip.is_documentation()  // 192.0.2/24, 198.51.100/24, 203.0.113/24
64        || o[0] == 0              // 0.0.0.0/8 "this network"
65        || (o[0] == 100 && (o[1] & 0xc0) == 64) // 100.64.0.0/10 CGNAT
66        || (o[0] == 192 && o[1] == 0 && o[2] == 0) // 192.0.0.0/24 IETF protocol
67        || (o[0] == 198 && (o[1] & 0xfe) == 18) // 198.18.0.0/15 benchmarking
68        || o[0] >= 240 // 240.0.0.0/4 reserved (excludes broadcast already)
69}
70
71fn is_blocked_ipv6(ip: Ipv6Addr) -> bool {
72    // IPv4-mapped / -compatible: classify by the embedded IPv4 address.
73    if let Some(v4) = ip.to_ipv4_mapped() {
74        return is_blocked_ipv4(v4);
75    }
76    if let Some(v4) = ip.to_ipv4() {
77        // Covers ::a.b.c.d (incl. ::1 loopback and :: unspecified).
78        return is_blocked_ipv4(v4);
79    }
80    let seg = ip.segments();
81    ip.is_loopback()
82        || ip.is_unspecified()
83        || ip.is_multicast()
84        || (seg[0] & 0xffc0) == 0xfe80 // fe80::/10 link-local
85        || (seg[0] & 0xfe00) == 0xfc00 // fc00::/7 unique local (ULA)
86        || (seg[0] == 0x2001 && seg[1] == 0x0db8) // 2001:db8::/32 documentation
87}
88
89/// An error describing why a URL was rejected by the guard.
90#[derive(Debug)]
91pub struct BlockedUrl(pub String);
92
93impl std::fmt::Display for BlockedUrl {
94    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
95        write!(f, "blocked URL: {}", self.0)
96    }
97}
98
99impl std::error::Error for BlockedUrl {}
100
101/// Validate a URL's scheme and resolve+classify its host. On success returns
102/// the validated socket addresses (host resolved to public IPs) so the caller
103/// can pin the connection and avoid a DNS-rebinding TOCTOU window.
104///
105/// A no-op (returns `Ok(vec![])`) when the guard is disabled via env.
106///
107/// Async because domain validation resolves DNS via [`tokio::net::lookup_host`]
108/// rather than the blocking `std` resolver — important on the async fetch path
109/// (and the concurrent MCP server) so a slow lookup never blocks a tokio worker.
110pub async fn validate_url(url: &Url) -> Result<Vec<std::net::SocketAddr>, BlockedUrl> {
111    if allow_private() {
112        return Ok(Vec::new());
113    }
114
115    match url.scheme() {
116        "http" | "https" => {}
117        other => return Err(BlockedUrl(format!("scheme `{other}` not allowed"))),
118    }
119
120    let host = url
121        .host()
122        .ok_or_else(|| BlockedUrl(format!("no host in {url}")))?;
123
124    match host {
125        Host::Ipv4(ip) => {
126            if is_blocked_ip(IpAddr::V4(ip)) {
127                return Err(BlockedUrl(format!("host IP {ip} is not public")));
128            }
129            Ok(Vec::new())
130        }
131        Host::Ipv6(ip) => {
132            if is_blocked_ip(IpAddr::V6(ip)) {
133                return Err(BlockedUrl(format!("host IP {ip} is not public")));
134            }
135            Ok(Vec::new())
136        }
137        Host::Domain(domain) => validate_domain(url, domain).await,
138    }
139}
140
141async fn validate_domain(url: &Url, domain: &str) -> Result<Vec<std::net::SocketAddr>, BlockedUrl> {
142    // Block obvious local names early; DNS may also resolve these.
143    let lower = domain.to_ascii_lowercase();
144    if lower == "localhost" || lower.ends_with(".localhost") {
145        return Err(BlockedUrl(format!("host `{domain}` is local")));
146    }
147
148    let port = url
149        .port_or_known_default()
150        .ok_or_else(|| BlockedUrl(format!("no port for {url}")))?;
151
152    // Resolve (non-blocking) and require that EVERY resolved address is public,
153    // then return them so the connection can be pinned to the validated set.
154    let addrs: Vec<_> = tokio::net::lookup_host((domain, port))
155        .await
156        .map_err(|e| BlockedUrl(format!("cannot resolve `{domain}`: {e}")))?
157        .collect();
158
159    if addrs.is_empty() {
160        return Err(BlockedUrl(format!("`{domain}` resolved to no addresses")));
161    }
162    for addr in &addrs {
163        if is_blocked_ip(addr.ip()) {
164            return Err(BlockedUrl(format!(
165                "`{domain}` resolves to non-public IP {}",
166                addr.ip()
167            )));
168        }
169    }
170    Ok(addrs)
171}
172
173#[cfg(test)]
174mod tests {
175    use super::*;
176
177    fn blocked(s: &str) -> bool {
178        is_blocked_ip(s.parse::<IpAddr>().unwrap())
179    }
180
181    #[test]
182    fn blocks_loopback_and_private_and_metadata() {
183        assert!(blocked("127.0.0.1"));
184        assert!(blocked("10.0.0.1"));
185        assert!(blocked("172.16.5.4"));
186        assert!(blocked("192.168.1.1"));
187        assert!(blocked("169.254.169.254")); // cloud metadata
188        assert!(blocked("100.64.0.1")); // CGNAT
189        assert!(blocked("0.0.0.0"));
190        assert!(blocked("255.255.255.255"));
191        assert!(blocked("224.0.0.1")); // multicast
192        assert!(blocked("240.0.0.1")); // reserved
193    }
194
195    #[test]
196    fn blocks_ipv6_local_and_mapped() {
197        assert!(blocked("::1")); // loopback
198        assert!(blocked("::")); // unspecified
199        assert!(blocked("fe80::1")); // link-local
200        assert!(blocked("fc00::1")); // ULA
201        assert!(blocked("::ffff:127.0.0.1")); // v4-mapped loopback
202        assert!(blocked("::ffff:169.254.169.254")); // v4-mapped metadata
203    }
204
205    #[test]
206    fn allows_public() {
207        assert!(!blocked("1.1.1.1"));
208        assert!(!blocked("8.8.8.8"));
209        assert!(!blocked("93.184.216.34")); // example.com
210        assert!(!blocked("2606:4700:4700::1111")); // cloudflare v6
211    }
212
213    #[tokio::test]
214    async fn rejects_non_http_scheme() {
215        let url = Url::parse("file:///etc/passwd").unwrap();
216        assert!(validate_url(&url).await.is_err());
217        let url = Url::parse("ftp://example.com/x").unwrap();
218        assert!(validate_url(&url).await.is_err());
219    }
220
221    #[tokio::test]
222    async fn rejects_literal_metadata_ip_url() {
223        let url = Url::parse("http://169.254.169.254/latest/meta-data/").unwrap();
224        assert!(validate_url(&url).await.is_err());
225    }
226
227    #[tokio::test]
228    async fn rejects_localhost_name() {
229        let url = Url::parse("http://localhost:8080/admin").unwrap();
230        assert!(validate_url(&url).await.is_err());
231    }
232
233    // A redirect target is validated by the exact same `validate_url` the fetch
234    // loop runs (and pins) on every hop, so a redirect to a private/loopback IP
235    // is rejected before any connection is made.
236    #[tokio::test]
237    async fn rejects_redirect_target_to_private_ip() {
238        for target in [
239            "http://127.0.0.1/internal",
240            "http://10.0.0.1/admin",
241            "http://192.168.1.1/",
242            "http://169.254.169.254/latest/meta-data/",
243        ] {
244            let url = Url::parse(target).unwrap();
245            assert!(
246                validate_url(&url).await.is_err(),
247                "redirect target {target} should be blocked"
248            );
249        }
250    }
251}