reposix_core/http.rs
1//! The one legal HTTP client factory + per-request allowlist gate.
2//!
3//! Security contract (see `.planning/PROJECT.md` SG-01 and SG-07):
4//!
5//! - Every outbound HTTP call in this workspace MUST go through
6//! [`client`] to build an [`HttpClient`] and then call one of
7//! [`HttpClient::request`] / [`HttpClient::get`] / [`HttpClient::post`] /
8//! [`HttpClient::patch`] / [`HttpClient::delete`]. The raw
9//! [`reqwest::Client`] is deliberately hidden inside the [`HttpClient`]
10//! newtype with a private `inner` field — callers physically cannot reach
11//! the unchecked send methods.
12//! - Direct construction of `reqwest::Client` / `reqwest::ClientBuilder`
13//! is banned by the workspace-root `clippy.toml` `disallowed-methods` lint.
14//! The single legal construction site lives in [`client`] and carries a
15//! `#[allow(clippy::disallowed_methods)]` with justifying comment.
16//! - The allowlist is read from `REPOSIX_ALLOWED_ORIGINS` (comma-separated
17//! `scheme://host[:port]` patterns; port may be `*`). Default when unset
18//! or empty is `http://127.0.0.1:*,http://localhost:*`.
19//! - Redirects are not followed; callers who wish to follow a `Location`
20//! header MUST re-feed the target through [`HttpClient::request`], at
21//! which point the allowlist gate runs again.
22
23#![allow(clippy::module_name_repetitions)]
24
25use std::time::Duration;
26
27use reqwest::{IntoUrl, Method};
28use url::Url;
29
30use crate::error::{Error, Result};
31
32/// Default allowlist when `REPOSIX_ALLOWED_ORIGINS` is unset or empty.
33pub const DEFAULT_ALLOWLIST_RAW: &str = "http://127.0.0.1:*,http://localhost:*";
34
35/// Environment variable that, when set, overrides the default allowlist.
36pub const ALLOWLIST_ENV_VAR: &str = "REPOSIX_ALLOWED_ORIGINS";
37
38/// Options for constructing an HTTP client. [`Default`] yields the 5-second
39/// total-timeout client that ~95% of callers want (SG-07).
40#[derive(Debug, Clone)]
41pub struct ClientOpts {
42 /// Total request timeout. Default `Duration::from_secs(5)`.
43 pub total_timeout: Duration,
44 /// User-Agent header. Default `Some("reposix/0.1.0")`.
45 pub user_agent: Option<String>,
46}
47
48impl Default for ClientOpts {
49 fn default() -> Self {
50 Self {
51 total_timeout: Duration::from_secs(5),
52 user_agent: Some(concat!("reposix/", env!("CARGO_PKG_VERSION")).to_owned()),
53 }
54 }
55}
56
57/// Parsed allowlist entry. `port == None` means "any port" (`*`).
58#[derive(Debug, Clone, PartialEq, Eq)]
59pub(crate) struct OriginGlob {
60 scheme: String,
61 host: String,
62 port: Option<u16>,
63}
64
65impl OriginGlob {
66 /// Returns true iff `url`'s scheme, host, and port all match this glob.
67 pub(crate) fn matches(&self, url: &Url) -> bool {
68 if url.scheme() != self.scheme {
69 return false;
70 }
71 let Some(url_host) = url.host_str() else {
72 return false;
73 };
74 if url_host != self.host {
75 return false;
76 }
77 match self.port {
78 None => true,
79 Some(expected) => url.port_or_known_default() == Some(expected),
80 }
81 }
82}
83
84/// Parse a comma-separated allowlist spec.
85///
86/// Grammar (v0.1 — see 01-CONTEXT.md): each entry is `scheme://host[:port]`
87/// where `scheme` is `http` or `https`, `host` is any valid URL host
88/// (including bracketed IPv6 literals), and `port` is either a u16 decimal
89/// or `*` (any port). Parsing is delegated to the `url` crate so bracketed
90/// IPv6 literals (`http://[::1]:7777`) work correctly. Leading/trailing
91/// whitespace per entry is trimmed. An empty or all-whitespace input yields
92/// the default allowlist.
93///
94/// # Errors
95/// Returns [`Error::Other`] if any entry fails to parse.
96pub(crate) fn parse_allowlist(raw: &str) -> Result<Vec<OriginGlob>> {
97 let trimmed = raw.trim();
98 if trimmed.is_empty() {
99 return parse_allowlist_inner(DEFAULT_ALLOWLIST_RAW);
100 }
101 parse_allowlist_inner(trimmed)
102}
103
104fn parse_allowlist_inner(raw: &str) -> Result<Vec<OriginGlob>> {
105 let mut out = Vec::new();
106 for (idx, entry) in raw.split(',').enumerate() {
107 let entry = entry.trim();
108 if entry.is_empty() {
109 return Err(Error::Other(format!(
110 "REPOSIX_ALLOWED_ORIGINS: entry {idx} is empty"
111 )));
112 }
113 out.push(parse_one(entry)?);
114 }
115 Ok(out)
116}
117
118fn parse_one(entry: &str) -> Result<OriginGlob> {
119 // Support the "port = *" wildcard by stripping it before URL parsing,
120 // since the `url` crate refuses a literal `*` in the port slot.
121 let (url_src, wildcard_port) = if let Some(stripped) = entry.strip_suffix(":*") {
122 (stripped.to_owned(), true)
123 } else {
124 (entry.to_owned(), false)
125 };
126
127 // Append a trailing `/` so `url::Url::parse` has a complete origin-style
128 // URL to chew on. Without it, bare `http://host` still parses, but
129 // requiring a path here makes the error message consistent.
130 let mut to_parse = url_src;
131 if !to_parse.ends_with('/') {
132 to_parse.push('/');
133 }
134
135 let parsed = Url::parse(&to_parse).map_err(|e| {
136 Error::Other(format!(
137 "REPOSIX_ALLOWED_ORIGINS: entry {entry:?} failed to parse: {e}"
138 ))
139 })?;
140
141 let scheme = parsed.scheme().to_owned();
142 if scheme != "http" && scheme != "https" {
143 return Err(Error::Other(format!(
144 "REPOSIX_ALLOWED_ORIGINS: entry {entry:?} scheme must be http or https"
145 )));
146 }
147
148 let Some(host) = parsed.host_str() else {
149 return Err(Error::Other(format!(
150 "REPOSIX_ALLOWED_ORIGINS: entry {entry:?} has empty host"
151 )));
152 };
153 if host.is_empty() {
154 return Err(Error::Other(format!(
155 "REPOSIX_ALLOWED_ORIGINS: entry {entry:?} has empty host"
156 )));
157 }
158
159 let port = if wildcard_port {
160 // Explicit `:*` — caller requested "any port".
161 None
162 } else {
163 // `url::Url::port()` strips a port that matches the scheme's default
164 // (e.g. `http://host:80` => `None`). To preserve the user's explicit
165 // intent ("http://127.0.0.1:80" means port 80, not wildcard) we fall
166 // back to `port_or_known_default()` when `.port()` is `None`. If the
167 // entry has no port suffix at all (e.g. `http://host`), neither is
168 // present and `port_or_known_default()` yields the scheme default,
169 // which is also the correct match semantic.
170 parsed.port_or_known_default()
171 };
172
173 Ok(OriginGlob {
174 scheme,
175 host: host.to_owned(),
176 port,
177 })
178}
179
180/// Load the allowlist from `REPOSIX_ALLOWED_ORIGINS`, falling back to the
181/// loopback-only default when unset/empty.
182///
183/// # Errors
184/// Returns [`Error::Other`] if the env var is set but un-parseable.
185pub(crate) fn load_allowlist_from_env() -> Result<Vec<OriginGlob>> {
186 match std::env::var(ALLOWLIST_ENV_VAR) {
187 Ok(v) => parse_allowlist(&v),
188 Err(_) => parse_allowlist(""),
189 }
190}
191
192/// Sealed HTTP client wrapper.
193///
194/// The internal [`reqwest::Client`] is deliberately private: callers have no
195/// way to obtain `&reqwest::Client` from an [`HttpClient`] (no `Deref`, no
196/// `AsRef`, no `inner_client()`), so they physically cannot invoke
197/// `client.get(url).send()` and bypass the allowlist. Every send goes
198/// through [`HttpClient::request`] (or one of the method-specific helpers),
199/// which re-parses `url` and re-checks it against `REPOSIX_ALLOWED_ORIGINS`.
200#[derive(Debug, Clone)]
201pub struct HttpClient {
202 inner: reqwest::Client,
203}
204
205impl HttpClient {
206 /// Send a `method` request to `url`, re-checking `url` against the
207 /// allowlist before any I/O.
208 ///
209 /// This is the hook callers MUST use after observing a 3xx: re-feed the
210 /// `Location` URL through [`HttpClient::request`] so the allowlist
211 /// recheck rejects redirect targets that escape the allowlist (SG-01
212 /// defence in depth).
213 ///
214 /// # Errors
215 /// Returns [`Error::InvalidOrigin`] if `url` fails to parse or its origin
216 /// does not match any allowlist entry. Returns [`Error::Other`] if
217 /// `REPOSIX_ALLOWED_ORIGINS` is set but un-parseable. Returns
218 /// [`Error::Http`] for transport-level failures from `reqwest`.
219 pub async fn request<U: IntoUrl>(&self, method: Method, url: U) -> Result<reqwest::Response> {
220 // Delegate to `request_with_headers` with an empty header slice. Keeping
221 // the single-method hot path lets the allowlist gate live in one place.
222 self.request_with_headers(method, url, &[]).await
223 }
224
225 /// Send a `method` request to `url` with extra headers attached in order,
226 /// re-checking `url` against the allowlist before any I/O.
227 ///
228 /// The allowlist gate fires BEFORE any header is attached and BEFORE any
229 /// socket work; a non-allowlisted origin returns [`Error::InvalidOrigin`]
230 /// without leaking header data to the network layer. Headers are attached
231 /// in order; duplicates are allowed and preserved (reqwest does not
232 /// dedupe).
233 ///
234 /// This is the hook callers MUST use after observing a 3xx: re-feed the
235 /// `Location` URL through [`HttpClient::request_with_headers`] (or the
236 /// zero-header [`HttpClient::request`] wrapper) so the allowlist recheck
237 /// rejects redirect targets that escape the allowlist (SG-01 defence in
238 /// depth).
239 ///
240 /// # Errors
241 /// Returns [`Error::InvalidOrigin`] if `url` fails to parse or its origin
242 /// does not match any allowlist entry. Returns [`Error::Other`] if
243 /// `REPOSIX_ALLOWED_ORIGINS` is set but un-parseable. Returns
244 /// [`Error::Http`] for transport-level failures from `reqwest`.
245 pub async fn request_with_headers<U: IntoUrl>(
246 &self,
247 method: Method,
248 url: U,
249 headers: &[(&str, &str)],
250 ) -> Result<reqwest::Response> {
251 self.request_with_headers_and_body(method, url, headers, None::<&[u8]>)
252 .await
253 }
254
255 /// Send a `method` request with both headers AND an optional request
256 /// body, re-checking `url` against the allowlist before any I/O.
257 ///
258 /// Callers pass `None` (inferred via `None::<&[u8]>` or `None::<Vec<u8>>`)
259 /// for verbs that never carry bodies (`GET`, `DELETE`). `Some(body)`
260 /// attaches the bytes and `Content-Length` automatically via reqwest.
261 /// The allowlist gate fires BEFORE body serialization, so a non-
262 /// allowlisted origin never leaks body bytes to the network layer.
263 ///
264 /// # Errors
265 /// Same conditions as [`HttpClient::request_with_headers`].
266 pub async fn request_with_headers_and_body<U, B>(
267 &self,
268 method: Method,
269 url: U,
270 headers: &[(&str, &str)],
271 body: Option<B>,
272 ) -> Result<reqwest::Response>
273 where
274 U: IntoUrl,
275 B: Into<reqwest::Body>,
276 {
277 let parsed = url
278 .into_url()
279 .map_err(|e| Error::InvalidOrigin(format!("{e}")))?;
280 let allowlist = load_allowlist_from_env()?;
281 if !allowlist.iter().any(|g| g.matches(&parsed)) {
282 return Err(Error::InvalidOrigin(parsed.to_string()));
283 }
284 let mut builder = self.inner.request(method, parsed);
285 for (k, v) in headers {
286 builder = builder.header(*k, *v);
287 }
288 if let Some(body) = body {
289 builder = builder.body(body);
290 }
291 let resp = builder.send().await?;
292 Ok(resp)
293 }
294
295 /// Convenience wrapper for `GET url`.
296 ///
297 /// # Errors
298 /// Same as [`HttpClient::request`].
299 pub async fn get<U: IntoUrl>(&self, url: U) -> Result<reqwest::Response> {
300 self.request(Method::GET, url).await
301 }
302
303 /// Convenience wrapper for `POST url`.
304 ///
305 /// # Errors
306 /// Same as [`HttpClient::request`].
307 pub async fn post<U: IntoUrl>(&self, url: U) -> Result<reqwest::Response> {
308 self.request(Method::POST, url).await
309 }
310
311 /// Convenience wrapper for `PATCH url`.
312 ///
313 /// # Errors
314 /// Same as [`HttpClient::request`].
315 pub async fn patch<U: IntoUrl>(&self, url: U) -> Result<reqwest::Response> {
316 self.request(Method::PATCH, url).await
317 }
318
319 /// Convenience wrapper for `DELETE url`.
320 ///
321 /// # Errors
322 /// Same as [`HttpClient::request`].
323 pub async fn delete<U: IntoUrl>(&self, url: U) -> Result<reqwest::Response> {
324 self.request(Method::DELETE, url).await
325 }
326}
327
328// Intentionally NOT implemented:
329//
330// impl HttpClient { pub fn inner_client(&self) -> &reqwest::Client { ... } }
331// impl AsRef<reqwest::Client> for HttpClient { ... }
332// impl Deref for HttpClient { type Target = reqwest::Client; ... }
333//
334// These would defeat SG-01: a caller could pull the raw client out and call
335// `raw.get(url).send()` to bypass the allowlist gate. The compile-fail
336// fixture at `tests/compile-fail/http_client_inner_not_pub.rs` locks this.
337
338/// Build the one-and-only legal HTTP client for this workspace.
339///
340/// The returned [`HttpClient`] has redirects disabled and a 5-second total
341/// timeout (or whatever `opts.total_timeout` is). Callers MUST route every
342/// send through [`HttpClient::request`] so the per-request allowlist
343/// recheck runs — the factory alone is not a sufficient gate because
344/// callers can override the target URL at send time.
345///
346/// # Errors
347/// Returns [`Error::Other`] if `REPOSIX_ALLOWED_ORIGINS` is set but
348/// un-parseable, or [`Error::Http`] if `reqwest` itself refuses to build the
349/// client (e.g. a TLS-backend initialisation failure).
350pub fn client(opts: ClientOpts) -> Result<HttpClient> {
351 // Surface allowlist-parse errors at construction time so misconfigured
352 // operators fail loudly rather than silently.
353 let _ = load_allowlist_from_env()?;
354
355 // SG-01: this `#[allow]` marks the single legal construction site in the
356 // workspace. Any other construction of `reqwest::Client` / `ClientBuilder`
357 // is rejected by the workspace-root `clippy.toml` `disallowed-methods`
358 // lint. Do not remove this comment without updating that lint.
359 #[allow(clippy::disallowed_methods)]
360 let mut builder = reqwest::ClientBuilder::new();
361
362 builder = builder
363 .redirect(reqwest::redirect::Policy::none())
364 .timeout(opts.total_timeout);
365 if let Some(ua) = opts.user_agent {
366 builder = builder.user_agent(ua);
367 }
368 let inner = builder.build()?;
369 Ok(HttpClient { inner })
370}
371
372#[cfg(test)]
373mod tests {
374 use super::*;
375
376 #[test]
377 fn client_opts_default_is_5s_timeout() {
378 let opts = ClientOpts::default();
379 assert_eq!(opts.total_timeout, Duration::from_secs(5));
380 assert!(opts.user_agent.as_deref().unwrap().starts_with("reposix/"));
381 }
382
383 #[test]
384 fn parse_allowlist_default_has_two_entries() {
385 let entries = parse_allowlist("http://127.0.0.1:*,http://localhost:*").unwrap();
386 assert_eq!(entries.len(), 2);
387 }
388
389 #[test]
390 fn parse_allowlist_empty_input_returns_default() {
391 let entries = parse_allowlist("").unwrap();
392 assert_eq!(entries.len(), 2);
393 assert!(entries.iter().any(|g| g.host == "127.0.0.1"));
394 assert!(entries.iter().any(|g| g.host == "localhost"));
395 }
396
397 #[test]
398 fn parse_allowlist_whitespace_only_returns_default() {
399 let entries = parse_allowlist(" \t ").unwrap();
400 assert_eq!(entries.len(), 2);
401 }
402
403 #[test]
404 fn parse_allowlist_bad_input_errors() {
405 let err = parse_allowlist("not a url").unwrap_err();
406 assert!(matches!(err, Error::Other(_)), "got {err:?}");
407 }
408
409 #[test]
410 fn parse_allowlist_bad_scheme_errors() {
411 assert!(matches!(
412 parse_allowlist("ftp://127.0.0.1:*"),
413 Err(Error::Other(_))
414 ));
415 }
416
417 #[test]
418 fn parse_allowlist_empty_host_errors() {
419 assert!(matches!(
420 parse_allowlist("http://:80"),
421 Err(Error::Other(_))
422 ));
423 }
424
425 #[test]
426 fn parse_allowlist_bad_port_errors() {
427 assert!(matches!(
428 parse_allowlist("http://127.0.0.1:notaport"),
429 Err(Error::Other(_))
430 ));
431 }
432
433 #[test]
434 fn origin_glob_matches_loopback_any_port() {
435 let glob = &parse_allowlist("http://127.0.0.1:*").unwrap()[0];
436 let url = Url::parse("http://127.0.0.1:7878").unwrap();
437 assert!(glob.matches(&url));
438 }
439
440 #[test]
441 fn origin_glob_rejects_https_when_http_configured() {
442 let glob = &parse_allowlist("http://127.0.0.1:*").unwrap()[0];
443 let url = Url::parse("https://127.0.0.1:7878").unwrap();
444 assert!(!glob.matches(&url));
445 }
446
447 #[test]
448 fn origin_glob_rejects_non_loopback_host() {
449 let glob = &parse_allowlist("http://127.0.0.1:*").unwrap()[0];
450 let url = Url::parse("http://evil.example:80").unwrap();
451 assert!(!glob.matches(&url));
452 }
453
454 #[test]
455 fn origin_glob_matches_exact_port() {
456 let glob = &parse_allowlist("http://127.0.0.1:80").unwrap()[0];
457 let url = Url::parse("http://127.0.0.1:80").unwrap();
458 assert!(glob.matches(&url));
459 }
460
461 #[test]
462 fn origin_glob_rejects_wrong_exact_port() {
463 let glob = &parse_allowlist("http://127.0.0.1:80").unwrap()[0];
464 let url = Url::parse("http://127.0.0.1:81").unwrap();
465 assert!(!glob.matches(&url));
466 }
467
468 // L-02: IPv6 literal allowlist support.
469
470 #[test]
471 fn parse_allowlist_accepts_ipv6_with_explicit_port() {
472 let entries = parse_allowlist("http://[::1]:7777").unwrap();
473 assert_eq!(entries.len(), 1);
474 assert_eq!(entries[0].host, "[::1]");
475 assert_eq!(entries[0].port, Some(7777));
476 }
477
478 #[test]
479 fn parse_allowlist_accepts_ipv6_with_wildcard_port() {
480 let entries = parse_allowlist("http://[::1]:*").unwrap();
481 assert_eq!(entries.len(), 1);
482 assert_eq!(entries[0].host, "[::1]");
483 assert_eq!(entries[0].port, None);
484 }
485
486 #[test]
487 fn origin_glob_matches_ipv6_loopback_any_port() {
488 let glob = &parse_allowlist("http://[::1]:*").unwrap()[0];
489 let url = Url::parse("http://[::1]:7777/").unwrap();
490 assert!(glob.matches(&url));
491 }
492
493 #[test]
494 fn origin_glob_matches_ipv6_loopback_exact_port() {
495 let glob = &parse_allowlist("http://[::1]:7777").unwrap()[0];
496 let url = Url::parse("http://[::1]:7777/").unwrap();
497 assert!(glob.matches(&url));
498 }
499
500 #[test]
501 fn origin_glob_ipv6_rejects_wrong_port() {
502 let glob = &parse_allowlist("http://[::1]:7777").unwrap()[0];
503 let url = Url::parse("http://[::1]:7778/").unwrap();
504 assert!(!glob.matches(&url));
505 }
506
507 #[test]
508 fn parse_allowlist_localhost_wildcard_still_parses() {
509 let entries = parse_allowlist("https://localhost:*").unwrap();
510 assert_eq!(entries.len(), 1);
511 assert_eq!(entries[0].scheme, "https");
512 assert_eq!(entries[0].host, "localhost");
513 assert_eq!(entries[0].port, None);
514 }
515}