agnt_tools/ssrf.rs
1//! Atomic SSRF-guarded DNS resolver for the `Fetch` tool.
2//!
3//! ## Why this exists
4//!
5//! v0.2 shipped a two-phase SSRF guard: `ssrf_check` called `ToSocketAddrs`,
6//! validated the returned IPs, and then handed the raw URL to `ureq`. The
7//! problem — first identified in v0.3's adversarial review — is that `ureq`
8//! does its **own** DNS lookup when it actually makes the request. A hostile
9//! authority with a short TTL can return a safe public IP at check time and
10//! flip the record to `169.254.169.254` (or any RFC1918 address) before the
11//! second lookup lands. Classic DNS rebinding. Classic TOCTOU.
12//!
13//! ## The fix
14//!
15//! `ureq::AgentBuilder::resolver` installs a custom [`ureq::Resolver`] that
16//! is the *only* DNS path the agent uses. If we validate inside the resolver
17//! itself and return the validated `SocketAddr`s directly, `ureq` connects
18//! to those exact addresses — no second lookup, no gap. Atomic.
19//!
20//! That is what [`SsrfResolver`] does:
21//!
22//! 1. Parse the netloc (`host:port`).
23//! 2. Reject the metadata hostname blocklist.
24//! 3. If an explicit `allow_hosts` list is configured, reject non-members.
25//! 4. Resolve once.
26//! 5. Reject any returned IP in the loopback / private / link-local /
27//! broadcast / unspecified / multicast ranges, plus the explicit AWS
28//! metadata IP (`169.254.169.254`) as belt-and-suspenders on top of
29//! `is_link_local`.
30//! 6. Return the survivors. `ureq` uses *these* addresses.
31//!
32//! The old `ssrf_check` wrapper still exists in `builtins.rs` for the
33//! upfront scheme / URL-shape check — the resolver can't see the scheme,
34//! only the netloc. Splitting responsibilities keeps each layer minimal.
35//!
36//! ## What this still does not defend against
37//!
38//! - **Dual-stack trickery.** If a host resolves to both an IPv4 and an
39//! IPv6 address, and one is public and one is private, this rejects the
40//! whole batch — the old implementation did the same. Correct behavior
41//! but worth noting.
42//! - **IPv6 ULA boundaries.** `std::net::Ipv6Addr::is_private` is still
43//! unstable on the crate's MSRV (1.75), so we hand-check the `fc00::/7`
44//! and `fe80::/10` prefixes. If a future IPv6 reservation lands outside
45//! those, it won't be blocked.
46//! - **Public-but-sensitive internal APIs.** If you run a production service
47//! on a public IP you own, SSRF-by-IP alone won't save you. Use
48//! `allow_hosts` as the positive gate for those cases.
49
50use std::io;
51use std::net::{IpAddr, SocketAddr, ToSocketAddrs};
52
53/// Lowercased host names that must never be resolved, regardless of what
54/// DNS would return. Covers the GCP metadata alias that resolves to a
55/// public-looking IP from outside a VM but exposes credentials from inside.
56const METADATA_HOST_BLOCKLIST: &[&str] = &[
57 "metadata.google.internal",
58 "metadata",
59 "metadata.goog",
60 // AWS IMDS IP written as a hostname — belt-and-suspenders against a
61 // clever URL parser.
62 "169.254.169.254",
63];
64
65/// A [`ureq::Resolver`] that performs DNS and SSRF validation atomically.
66///
67/// Install on a `ureq::Agent` via `AgentBuilder::resolver` and ureq will
68/// call [`SsrfResolver::resolve`] exactly once per connection attempt,
69/// using whatever socket addresses we return. There is no second lookup
70/// inside `ureq`, so a DNS rebinding flip between check and use is
71/// structurally impossible.
72#[derive(Debug, Clone, Default)]
73pub struct SsrfResolver {
74 /// Optional positive allowlist. When `Some`, every host must match
75 /// (case-insensitive) or the resolver rejects with `PermissionDenied`.
76 /// Compared *before* DNS so the agent never issues a lookup for a
77 /// rejected host.
78 pub allow_hosts: Option<Vec<String>>,
79}
80
81impl SsrfResolver {
82 /// Build a resolver with no allowlist (all hosts pass except the
83 /// metadata blocklist and private IP ranges).
84 pub fn new() -> Self {
85 Self { allow_hosts: None }
86 }
87
88 /// Build a resolver with an explicit host allowlist.
89 pub fn with_allow_hosts(hosts: Vec<String>) -> Self {
90 Self {
91 allow_hosts: Some(hosts.into_iter().map(|h| h.to_lowercase()).collect()),
92 }
93 }
94
95 /// Standalone validation for a list of resolved addresses. Exposed so
96 /// tests can exercise the decision logic without going through a ureq
97 /// agent, and so `Fetch::call` can reuse the same predicate for the
98 /// early scheme/shape check.
99 pub fn validate_addrs(host: &str, addrs: &[SocketAddr]) -> io::Result<()> {
100 if addrs.is_empty() {
101 return Err(io::Error::new(
102 io::ErrorKind::NotFound,
103 format!("no addresses for {}", host),
104 ));
105 }
106 for sa in addrs {
107 let ip = sa.ip();
108 if ip.is_loopback() || ip.is_unspecified() || ip.is_multicast() {
109 return Err(io::Error::new(
110 io::ErrorKind::PermissionDenied,
111 format!("rejected IP {} for {}", ip, host),
112 ));
113 }
114 match ip {
115 IpAddr::V4(v4) => {
116 if v4.is_private() || v4.is_link_local() || v4.is_broadcast() {
117 return Err(io::Error::new(
118 io::ErrorKind::PermissionDenied,
119 format!("rejected IPv4 {} for {}", v4, host),
120 ));
121 }
122 // 169.254.169.254 is already caught by is_link_local;
123 // explicit match documents the intent and guards against
124 // std ever dropping link-local from the check.
125 if v4.octets() == [169, 254, 169, 254] {
126 return Err(io::Error::new(
127 io::ErrorKind::PermissionDenied,
128 format!("rejected AWS metadata IP for {}", host),
129 ));
130 }
131 }
132 IpAddr::V6(v6) => {
133 let seg0 = v6.segments()[0];
134 // fc00::/7 (ULA) and fe80::/10 (link-local).
135 if (seg0 & 0xfe00) == 0xfc00 || (seg0 & 0xffc0) == 0xfe80 {
136 return Err(io::Error::new(
137 io::ErrorKind::PermissionDenied,
138 format!("rejected IPv6 {} for {}", v6, host),
139 ));
140 }
141 }
142 }
143 }
144 Ok(())
145 }
146}
147
148impl ureq::Resolver for SsrfResolver {
149 fn resolve(&self, netloc: &str) -> io::Result<Vec<SocketAddr>> {
150 let (raw_host, _) = netloc.rsplit_once(':').ok_or_else(|| {
151 io::Error::new(io::ErrorKind::InvalidInput, format!("bad netloc: {}", netloc))
152 })?;
153 // IPv6 literals arrive bracketed: "[::1]:443". Strip for comparison.
154 let host = raw_host
155 .trim_start_matches('[')
156 .trim_end_matches(']')
157 .to_lowercase();
158
159 if METADATA_HOST_BLOCKLIST.iter().any(|&h| h == host) {
160 return Err(io::Error::new(
161 io::ErrorKind::PermissionDenied,
162 format!("rejected metadata host: {}", host),
163 ));
164 }
165
166 if let Some(allow) = &self.allow_hosts {
167 if !allow.iter().any(|h| h == &host) {
168 return Err(io::Error::new(
169 io::ErrorKind::PermissionDenied,
170 format!("host {} not in allowlist", host),
171 ));
172 }
173 }
174
175 let addrs: Vec<SocketAddr> = netloc.to_socket_addrs()?.collect();
176 Self::validate_addrs(&host, &addrs)?;
177 Ok(addrs)
178 }
179}
180
181#[cfg(test)]
182mod tests {
183 use super::*;
184 use ureq::Resolver;
185
186 #[test]
187 fn rejects_metadata_host_before_dns() {
188 let r = SsrfResolver::new();
189 let err = r.resolve("metadata.google.internal:80").unwrap_err();
190 assert_eq!(err.kind(), io::ErrorKind::PermissionDenied);
191 assert!(err.to_string().contains("metadata"));
192 }
193
194 #[test]
195 fn rejects_aws_metadata_ip_as_hostname() {
196 let r = SsrfResolver::new();
197 let err = r.resolve("169.254.169.254:80").unwrap_err();
198 assert_eq!(err.kind(), io::ErrorKind::PermissionDenied);
199 }
200
201 #[test]
202 fn rejects_non_allowlist_host_before_dns() {
203 let r = SsrfResolver::with_allow_hosts(vec!["example.com".into()]);
204 let err = r.resolve("not-on-list.invalid:80").unwrap_err();
205 assert_eq!(err.kind(), io::ErrorKind::PermissionDenied);
206 assert!(err.to_string().contains("allowlist"));
207 }
208
209 #[test]
210 fn validate_addrs_rejects_loopback() {
211 let sa: SocketAddr = "127.0.0.1:80".parse().unwrap();
212 let err = SsrfResolver::validate_addrs("localhost", &[sa]).unwrap_err();
213 assert_eq!(err.kind(), io::ErrorKind::PermissionDenied);
214 }
215
216 #[test]
217 fn validate_addrs_rejects_private_ipv4() {
218 let sa: SocketAddr = "10.0.0.5:80".parse().unwrap();
219 let err = SsrfResolver::validate_addrs("internal.corp", &[sa]).unwrap_err();
220 assert_eq!(err.kind(), io::ErrorKind::PermissionDenied);
221 assert!(err.to_string().contains("IPv4"));
222 }
223
224 #[test]
225 fn validate_addrs_rejects_link_local_ipv4() {
226 let sa: SocketAddr = "169.254.169.254:80".parse().unwrap();
227 let err = SsrfResolver::validate_addrs("anywhere", &[sa]).unwrap_err();
228 assert_eq!(err.kind(), io::ErrorKind::PermissionDenied);
229 }
230
231 #[test]
232 fn validate_addrs_rejects_ipv6_ula() {
233 let sa: SocketAddr = "[fc00::1]:80".parse().unwrap();
234 let err = SsrfResolver::validate_addrs("anywhere", &[sa]).unwrap_err();
235 assert_eq!(err.kind(), io::ErrorKind::PermissionDenied);
236 }
237
238 #[test]
239 fn validate_addrs_rejects_ipv6_link_local() {
240 let sa: SocketAddr = "[fe80::1]:80".parse().unwrap();
241 let err = SsrfResolver::validate_addrs("anywhere", &[sa]).unwrap_err();
242 assert_eq!(err.kind(), io::ErrorKind::PermissionDenied);
243 }
244
245 #[test]
246 fn validate_addrs_rejects_empty_list() {
247 let err = SsrfResolver::validate_addrs("anywhere", &[]).unwrap_err();
248 assert_eq!(err.kind(), io::ErrorKind::NotFound);
249 }
250
251 #[test]
252 fn validate_addrs_accepts_public_ipv4() {
253 let sa: SocketAddr = "93.184.216.34:80".parse().unwrap(); // example.com
254 SsrfResolver::validate_addrs("example.com", &[sa]).unwrap();
255 }
256
257 #[test]
258 fn validate_addrs_rejects_batch_if_any_private() {
259 // Dual-stack case where one address is public and one is private.
260 // We reject the whole batch — better safe than routing-dependent.
261 let public: SocketAddr = "93.184.216.34:80".parse().unwrap();
262 let private: SocketAddr = "10.0.0.1:80".parse().unwrap();
263 let err = SsrfResolver::validate_addrs("dual.example", &[public, private])
264 .unwrap_err();
265 assert_eq!(err.kind(), io::ErrorKind::PermissionDenied);
266 }
267
268 #[test]
269 fn ipv6_literal_netloc_strips_brackets() {
270 let r = SsrfResolver::new();
271 // ::1 is loopback — this should get rejected by the IP check, not
272 // misparsed as a weird hostname.
273 let err = r.resolve("[::1]:80").unwrap_err();
274 assert_eq!(err.kind(), io::ErrorKind::PermissionDenied);
275 }
276}