Skip to main content

agnt_tools/
ssrf.rs

1//! Atomic SSRF-guarded DNS resolver for the `Fetch` tool.
2//!
3//! ## Why this exists
4//!
5//! v0.2 shipped a two-phase SSRF guard: `ssrf_check` called `ToSocketAddrs`,
6//! validated the returned IPs, and then handed the raw URL to `ureq`. The
7//! problem — first identified in v0.3's adversarial review — is that `ureq`
8//! does its **own** DNS lookup when it actually makes the request. A hostile
9//! authority with a short TTL can return a safe public IP at check time and
10//! flip the record to `169.254.169.254` (or any RFC1918 address) before the
11//! second lookup lands. Classic DNS rebinding. Classic TOCTOU.
12//!
13//! ## The fix
14//!
15//! `ureq::AgentBuilder::resolver` installs a custom [`ureq::Resolver`] that
16//! is the *only* DNS path the agent uses. If we validate inside the resolver
17//! itself and return the validated `SocketAddr`s directly, `ureq` connects
18//! to those exact addresses — no second lookup, no gap. Atomic.
19//!
20//! That is what [`SsrfResolver`] does:
21//!
22//! 1. Parse the netloc (`host:port`).
23//! 2. Reject the metadata hostname blocklist.
24//! 3. If an explicit `allow_hosts` list is configured, reject non-members.
25//! 4. Resolve once.
26//! 5. Reject any returned IP in the loopback / private / link-local /
27//!    broadcast / unspecified / multicast ranges, plus the explicit AWS
28//!    metadata IP (`169.254.169.254`) as belt-and-suspenders on top of
29//!    `is_link_local`.
30//! 6. Return the survivors. `ureq` uses *these* addresses.
31//!
32//! The old `ssrf_check` wrapper still exists in `builtins.rs` for the
33//! upfront scheme / URL-shape check — the resolver can't see the scheme,
34//! only the netloc. Splitting responsibilities keeps each layer minimal.
35//!
36//! ## What this still does not defend against
37//!
38//! - **Dual-stack trickery.** If a host resolves to both an IPv4 and an
39//!   IPv6 address, and one is public and one is private, this rejects the
40//!   whole batch — the old implementation did the same. Correct behavior
41//!   but worth noting.
42//! - **IPv6 ULA boundaries.** `std::net::Ipv6Addr::is_private` is still
43//!   unstable on the crate's MSRV (1.75), so we hand-check the `fc00::/7`
44//!   and `fe80::/10` prefixes. If a future IPv6 reservation lands outside
45//!   those, it won't be blocked.
46//! - **Public-but-sensitive internal APIs.** If you run a production service
47//!   on a public IP you own, SSRF-by-IP alone won't save you. Use
48//!   `allow_hosts` as the positive gate for those cases.
49
50use std::io;
51use std::net::{IpAddr, SocketAddr, ToSocketAddrs};
52
53/// Lowercased host names that must never be resolved, regardless of what
54/// DNS would return. Covers the GCP metadata alias that resolves to a
55/// public-looking IP from outside a VM but exposes credentials from inside.
56const METADATA_HOST_BLOCKLIST: &[&str] = &[
57    "metadata.google.internal",
58    "metadata",
59    "metadata.goog",
60    // AWS IMDS IP written as a hostname — belt-and-suspenders against a
61    // clever URL parser.
62    "169.254.169.254",
63];
64
65/// A [`ureq::Resolver`] that performs DNS and SSRF validation atomically.
66///
67/// Install on a `ureq::Agent` via `AgentBuilder::resolver` and ureq will
68/// call [`SsrfResolver::resolve`] exactly once per connection attempt,
69/// using whatever socket addresses we return. There is no second lookup
70/// inside `ureq`, so a DNS rebinding flip between check and use is
71/// structurally impossible.
72#[derive(Debug, Clone, Default)]
73pub struct SsrfResolver {
74    /// Optional positive allowlist. When `Some`, every host must match
75    /// (case-insensitive) or the resolver rejects with `PermissionDenied`.
76    /// Compared *before* DNS so the agent never issues a lookup for a
77    /// rejected host.
78    pub allow_hosts: Option<Vec<String>>,
79}
80
81impl SsrfResolver {
82    /// Build a resolver with no allowlist (all hosts pass except the
83    /// metadata blocklist and private IP ranges).
84    pub fn new() -> Self {
85        Self { allow_hosts: None }
86    }
87
88    /// Build a resolver with an explicit host allowlist.
89    pub fn with_allow_hosts(hosts: Vec<String>) -> Self {
90        Self {
91            allow_hosts: Some(hosts.into_iter().map(|h| h.to_lowercase()).collect()),
92        }
93    }
94
95    /// Standalone validation for a list of resolved addresses. Exposed so
96    /// tests can exercise the decision logic without going through a ureq
97    /// agent, and so `Fetch::call` can reuse the same predicate for the
98    /// early scheme/shape check.
99    pub fn validate_addrs(host: &str, addrs: &[SocketAddr]) -> io::Result<()> {
100        if addrs.is_empty() {
101            return Err(io::Error::new(
102                io::ErrorKind::NotFound,
103                format!("no addresses for {}", host),
104            ));
105        }
106        for sa in addrs {
107            let ip = sa.ip();
108            if ip.is_loopback() || ip.is_unspecified() || ip.is_multicast() {
109                return Err(io::Error::new(
110                    io::ErrorKind::PermissionDenied,
111                    format!("rejected IP {} for {}", ip, host),
112                ));
113            }
114            match ip {
115                IpAddr::V4(v4) => {
116                    if v4.is_private() || v4.is_link_local() || v4.is_broadcast() {
117                        return Err(io::Error::new(
118                            io::ErrorKind::PermissionDenied,
119                            format!("rejected IPv4 {} for {}", v4, host),
120                        ));
121                    }
122                    // 169.254.169.254 is already caught by is_link_local;
123                    // explicit match documents the intent and guards against
124                    // std ever dropping link-local from the check.
125                    if v4.octets() == [169, 254, 169, 254] {
126                        return Err(io::Error::new(
127                            io::ErrorKind::PermissionDenied,
128                            format!("rejected AWS metadata IP for {}", host),
129                        ));
130                    }
131                }
132                IpAddr::V6(v6) => {
133                    let seg0 = v6.segments()[0];
134                    // fc00::/7 (ULA) and fe80::/10 (link-local).
135                    if (seg0 & 0xfe00) == 0xfc00 || (seg0 & 0xffc0) == 0xfe80 {
136                        return Err(io::Error::new(
137                            io::ErrorKind::PermissionDenied,
138                            format!("rejected IPv6 {} for {}", v6, host),
139                        ));
140                    }
141                }
142            }
143        }
144        Ok(())
145    }
146}
147
148impl ureq::Resolver for SsrfResolver {
149    fn resolve(&self, netloc: &str) -> io::Result<Vec<SocketAddr>> {
150        let (raw_host, _) = netloc.rsplit_once(':').ok_or_else(|| {
151            io::Error::new(io::ErrorKind::InvalidInput, format!("bad netloc: {}", netloc))
152        })?;
153        // IPv6 literals arrive bracketed: "[::1]:443". Strip for comparison.
154        let host = raw_host
155            .trim_start_matches('[')
156            .trim_end_matches(']')
157            .to_lowercase();
158
159        if METADATA_HOST_BLOCKLIST.iter().any(|&h| h == host) {
160            return Err(io::Error::new(
161                io::ErrorKind::PermissionDenied,
162                format!("rejected metadata host: {}", host),
163            ));
164        }
165
166        if let Some(allow) = &self.allow_hosts {
167            if !allow.iter().any(|h| h == &host) {
168                return Err(io::Error::new(
169                    io::ErrorKind::PermissionDenied,
170                    format!("host {} not in allowlist", host),
171                ));
172            }
173        }
174
175        let addrs: Vec<SocketAddr> = netloc.to_socket_addrs()?.collect();
176        Self::validate_addrs(&host, &addrs)?;
177        Ok(addrs)
178    }
179}
180
181#[cfg(test)]
182mod tests {
183    use super::*;
184    use ureq::Resolver;
185
186    #[test]
187    fn rejects_metadata_host_before_dns() {
188        let r = SsrfResolver::new();
189        let err = r.resolve("metadata.google.internal:80").unwrap_err();
190        assert_eq!(err.kind(), io::ErrorKind::PermissionDenied);
191        assert!(err.to_string().contains("metadata"));
192    }
193
194    #[test]
195    fn rejects_aws_metadata_ip_as_hostname() {
196        let r = SsrfResolver::new();
197        let err = r.resolve("169.254.169.254:80").unwrap_err();
198        assert_eq!(err.kind(), io::ErrorKind::PermissionDenied);
199    }
200
201    #[test]
202    fn rejects_non_allowlist_host_before_dns() {
203        let r = SsrfResolver::with_allow_hosts(vec!["example.com".into()]);
204        let err = r.resolve("not-on-list.invalid:80").unwrap_err();
205        assert_eq!(err.kind(), io::ErrorKind::PermissionDenied);
206        assert!(err.to_string().contains("allowlist"));
207    }
208
209    #[test]
210    fn validate_addrs_rejects_loopback() {
211        let sa: SocketAddr = "127.0.0.1:80".parse().unwrap();
212        let err = SsrfResolver::validate_addrs("localhost", &[sa]).unwrap_err();
213        assert_eq!(err.kind(), io::ErrorKind::PermissionDenied);
214    }
215
216    #[test]
217    fn validate_addrs_rejects_private_ipv4() {
218        let sa: SocketAddr = "10.0.0.5:80".parse().unwrap();
219        let err = SsrfResolver::validate_addrs("internal.corp", &[sa]).unwrap_err();
220        assert_eq!(err.kind(), io::ErrorKind::PermissionDenied);
221        assert!(err.to_string().contains("IPv4"));
222    }
223
224    #[test]
225    fn validate_addrs_rejects_link_local_ipv4() {
226        let sa: SocketAddr = "169.254.169.254:80".parse().unwrap();
227        let err = SsrfResolver::validate_addrs("anywhere", &[sa]).unwrap_err();
228        assert_eq!(err.kind(), io::ErrorKind::PermissionDenied);
229    }
230
231    #[test]
232    fn validate_addrs_rejects_ipv6_ula() {
233        let sa: SocketAddr = "[fc00::1]:80".parse().unwrap();
234        let err = SsrfResolver::validate_addrs("anywhere", &[sa]).unwrap_err();
235        assert_eq!(err.kind(), io::ErrorKind::PermissionDenied);
236    }
237
238    #[test]
239    fn validate_addrs_rejects_ipv6_link_local() {
240        let sa: SocketAddr = "[fe80::1]:80".parse().unwrap();
241        let err = SsrfResolver::validate_addrs("anywhere", &[sa]).unwrap_err();
242        assert_eq!(err.kind(), io::ErrorKind::PermissionDenied);
243    }
244
245    #[test]
246    fn validate_addrs_rejects_empty_list() {
247        let err = SsrfResolver::validate_addrs("anywhere", &[]).unwrap_err();
248        assert_eq!(err.kind(), io::ErrorKind::NotFound);
249    }
250
251    #[test]
252    fn validate_addrs_accepts_public_ipv4() {
253        let sa: SocketAddr = "93.184.216.34:80".parse().unwrap(); // example.com
254        SsrfResolver::validate_addrs("example.com", &[sa]).unwrap();
255    }
256
257    #[test]
258    fn validate_addrs_rejects_batch_if_any_private() {
259        // Dual-stack case where one address is public and one is private.
260        // We reject the whole batch — better safe than routing-dependent.
261        let public: SocketAddr = "93.184.216.34:80".parse().unwrap();
262        let private: SocketAddr = "10.0.0.1:80".parse().unwrap();
263        let err = SsrfResolver::validate_addrs("dual.example", &[public, private])
264            .unwrap_err();
265        assert_eq!(err.kind(), io::ErrorKind::PermissionDenied);
266    }
267
268    #[test]
269    fn ipv6_literal_netloc_strips_brackets() {
270        let r = SsrfResolver::new();
271        // ::1 is loopback — this should get rejected by the IP check, not
272        // misparsed as a weird hostname.
273        let err = r.resolve("[::1]:80").unwrap_err();
274        assert_eq!(err.kind(), io::ErrorKind::PermissionDenied);
275    }
276}