Skip to main content

reposix_core/
http.rs

1//! The one legal HTTP client factory + per-request allowlist gate.
2//!
3//! Security contract (see `.planning/PROJECT.md` SG-01 and SG-07):
4//!
5//! - Every outbound HTTP call in this workspace MUST go through
6//!   [`client`] to build an [`HttpClient`] and then call one of
7//!   [`HttpClient::request`] / [`HttpClient::get`] / [`HttpClient::post`] /
8//!   [`HttpClient::patch`] / [`HttpClient::delete`]. The raw
9//!   [`reqwest::Client`] is deliberately hidden inside the [`HttpClient`]
10//!   newtype with a private `inner` field — callers physically cannot reach
11//!   the unchecked send methods.
12//! - Direct construction of `reqwest::Client` / `reqwest::ClientBuilder`
13//!   is banned by the workspace-root `clippy.toml` `disallowed-methods` lint.
14//!   The single legal construction site lives in [`client`] and carries a
15//!   `#[allow(clippy::disallowed_methods)]` with justifying comment.
16//! - The allowlist is read from `REPOSIX_ALLOWED_ORIGINS` (comma-separated
17//!   `scheme://host[:port]` patterns; port may be `*`). Default when unset
18//!   or empty is `http://127.0.0.1:*,http://localhost:*`.
19//! - Redirects are not followed; callers who wish to follow a `Location`
20//!   header MUST re-feed the target through [`HttpClient::request`], at
21//!   which point the allowlist gate runs again.
22
23#![allow(clippy::module_name_repetitions)]
24
25use std::time::Duration;
26
27use reqwest::{IntoUrl, Method};
28use url::Url;
29
30use crate::error::{Error, Result};
31
32/// Default allowlist when `REPOSIX_ALLOWED_ORIGINS` is unset or empty.
33pub const DEFAULT_ALLOWLIST_RAW: &str = "http://127.0.0.1:*,http://localhost:*";
34
35/// Environment variable that, when set, overrides the default allowlist.
36pub const ALLOWLIST_ENV_VAR: &str = "REPOSIX_ALLOWED_ORIGINS";
37
38/// Options for constructing an HTTP client. [`Default`] yields the 5-second
39/// total-timeout client that ~95% of callers want (SG-07).
40#[derive(Debug, Clone)]
41pub struct ClientOpts {
42    /// Total request timeout. Default `Duration::from_secs(5)`.
43    pub total_timeout: Duration,
44    /// User-Agent header. Default `Some("reposix/0.1.0")`.
45    pub user_agent: Option<String>,
46}
47
48impl Default for ClientOpts {
49    fn default() -> Self {
50        Self {
51            total_timeout: Duration::from_secs(5),
52            user_agent: Some(concat!("reposix/", env!("CARGO_PKG_VERSION")).to_owned()),
53        }
54    }
55}
56
57/// Parsed allowlist entry. `port == None` means "any port" (`*`).
58#[derive(Debug, Clone, PartialEq, Eq)]
59pub(crate) struct OriginGlob {
60    scheme: String,
61    host: String,
62    port: Option<u16>,
63}
64
65impl OriginGlob {
66    /// Returns true iff `url`'s scheme, host, and port all match this glob.
67    pub(crate) fn matches(&self, url: &Url) -> bool {
68        if url.scheme() != self.scheme {
69            return false;
70        }
71        let Some(url_host) = url.host_str() else {
72            return false;
73        };
74        if url_host != self.host {
75            return false;
76        }
77        match self.port {
78            None => true,
79            Some(expected) => url.port_or_known_default() == Some(expected),
80        }
81    }
82}
83
84/// Parse a comma-separated allowlist spec.
85///
86/// Grammar (v0.1 — see 01-CONTEXT.md): each entry is `scheme://host[:port]`
87/// where `scheme` is `http` or `https`, `host` is any valid URL host
88/// (including bracketed IPv6 literals), and `port` is either a u16 decimal
89/// or `*` (any port). Parsing is delegated to the `url` crate so bracketed
90/// IPv6 literals (`http://[::1]:7777`) work correctly. Leading/trailing
91/// whitespace per entry is trimmed. An empty or all-whitespace input yields
92/// the default allowlist.
93///
94/// # Errors
95/// Returns [`Error::Other`] if any entry fails to parse.
96pub(crate) fn parse_allowlist(raw: &str) -> Result<Vec<OriginGlob>> {
97    let trimmed = raw.trim();
98    if trimmed.is_empty() {
99        return parse_allowlist_inner(DEFAULT_ALLOWLIST_RAW);
100    }
101    parse_allowlist_inner(trimmed)
102}
103
104fn parse_allowlist_inner(raw: &str) -> Result<Vec<OriginGlob>> {
105    let mut out = Vec::new();
106    for (idx, entry) in raw.split(',').enumerate() {
107        let entry = entry.trim();
108        if entry.is_empty() {
109            return Err(Error::Other(format!(
110                "REPOSIX_ALLOWED_ORIGINS: entry {idx} is empty"
111            )));
112        }
113        out.push(parse_one(entry)?);
114    }
115    Ok(out)
116}
117
118fn parse_one(entry: &str) -> Result<OriginGlob> {
119    // Support the "port = *" wildcard by stripping it before URL parsing,
120    // since the `url` crate refuses a literal `*` in the port slot.
121    let (url_src, wildcard_port) = if let Some(stripped) = entry.strip_suffix(":*") {
122        (stripped.to_owned(), true)
123    } else {
124        (entry.to_owned(), false)
125    };
126
127    // Append a trailing `/` so `url::Url::parse` has a complete origin-style
128    // URL to chew on. Without it, bare `http://host` still parses, but
129    // requiring a path here makes the error message consistent.
130    let mut to_parse = url_src;
131    if !to_parse.ends_with('/') {
132        to_parse.push('/');
133    }
134
135    let parsed = Url::parse(&to_parse).map_err(|e| {
136        Error::Other(format!(
137            "REPOSIX_ALLOWED_ORIGINS: entry {entry:?} failed to parse: {e}"
138        ))
139    })?;
140
141    let scheme = parsed.scheme().to_owned();
142    if scheme != "http" && scheme != "https" {
143        return Err(Error::Other(format!(
144            "REPOSIX_ALLOWED_ORIGINS: entry {entry:?} scheme must be http or https"
145        )));
146    }
147
148    let Some(host) = parsed.host_str() else {
149        return Err(Error::Other(format!(
150            "REPOSIX_ALLOWED_ORIGINS: entry {entry:?} has empty host"
151        )));
152    };
153    if host.is_empty() {
154        return Err(Error::Other(format!(
155            "REPOSIX_ALLOWED_ORIGINS: entry {entry:?} has empty host"
156        )));
157    }
158
159    let port = if wildcard_port {
160        // Explicit `:*` — caller requested "any port".
161        None
162    } else {
163        // `url::Url::port()` strips a port that matches the scheme's default
164        // (e.g. `http://host:80` => `None`). To preserve the user's explicit
165        // intent ("http://127.0.0.1:80" means port 80, not wildcard) we fall
166        // back to `port_or_known_default()` when `.port()` is `None`. If the
167        // entry has no port suffix at all (e.g. `http://host`), neither is
168        // present and `port_or_known_default()` yields the scheme default,
169        // which is also the correct match semantic.
170        parsed.port_or_known_default()
171    };
172
173    Ok(OriginGlob {
174        scheme,
175        host: host.to_owned(),
176        port,
177    })
178}
179
180/// Load the allowlist from `REPOSIX_ALLOWED_ORIGINS`, falling back to the
181/// loopback-only default when unset/empty.
182///
183/// # Errors
184/// Returns [`Error::Other`] if the env var is set but un-parseable.
185pub(crate) fn load_allowlist_from_env() -> Result<Vec<OriginGlob>> {
186    match std::env::var(ALLOWLIST_ENV_VAR) {
187        Ok(v) => parse_allowlist(&v),
188        Err(_) => parse_allowlist(""),
189    }
190}
191
192/// Sealed HTTP client wrapper.
193///
194/// The internal [`reqwest::Client`] is deliberately private: callers have no
195/// way to obtain `&reqwest::Client` from an [`HttpClient`] (no `Deref`, no
196/// `AsRef`, no `inner_client()`), so they physically cannot invoke
197/// `client.get(url).send()` and bypass the allowlist. Every send goes
198/// through [`HttpClient::request`] (or one of the method-specific helpers),
199/// which re-parses `url` and re-checks it against `REPOSIX_ALLOWED_ORIGINS`.
200#[derive(Debug, Clone)]
201pub struct HttpClient {
202    inner: reqwest::Client,
203}
204
205impl HttpClient {
206    /// Send a `method` request to `url`, re-checking `url` against the
207    /// allowlist before any I/O.
208    ///
209    /// This is the hook callers MUST use after observing a 3xx: re-feed the
210    /// `Location` URL through [`HttpClient::request`] so the allowlist
211    /// recheck rejects redirect targets that escape the allowlist (SG-01
212    /// defence in depth).
213    ///
214    /// # Errors
215    /// Returns [`Error::InvalidOrigin`] if `url` fails to parse or its origin
216    /// does not match any allowlist entry. Returns [`Error::Other`] if
217    /// `REPOSIX_ALLOWED_ORIGINS` is set but un-parseable. Returns
218    /// [`Error::Http`] for transport-level failures from `reqwest`.
219    pub async fn request<U: IntoUrl>(&self, method: Method, url: U) -> Result<reqwest::Response> {
220        // Delegate to `request_with_headers` with an empty header slice. Keeping
221        // the single-method hot path lets the allowlist gate live in one place.
222        self.request_with_headers(method, url, &[]).await
223    }
224
225    /// Send a `method` request to `url` with extra headers attached in order,
226    /// re-checking `url` against the allowlist before any I/O.
227    ///
228    /// The allowlist gate fires BEFORE any header is attached and BEFORE any
229    /// socket work; a non-allowlisted origin returns [`Error::InvalidOrigin`]
230    /// without leaking header data to the network layer. Headers are attached
231    /// in order; duplicates are allowed and preserved (reqwest does not
232    /// dedupe).
233    ///
234    /// This is the hook callers MUST use after observing a 3xx: re-feed the
235    /// `Location` URL through [`HttpClient::request_with_headers`] (or the
236    /// zero-header [`HttpClient::request`] wrapper) so the allowlist recheck
237    /// rejects redirect targets that escape the allowlist (SG-01 defence in
238    /// depth).
239    ///
240    /// # Errors
241    /// Returns [`Error::InvalidOrigin`] if `url` fails to parse or its origin
242    /// does not match any allowlist entry. Returns [`Error::Other`] if
243    /// `REPOSIX_ALLOWED_ORIGINS` is set but un-parseable. Returns
244    /// [`Error::Http`] for transport-level failures from `reqwest`.
245    pub async fn request_with_headers<U: IntoUrl>(
246        &self,
247        method: Method,
248        url: U,
249        headers: &[(&str, &str)],
250    ) -> Result<reqwest::Response> {
251        self.request_with_headers_and_body(method, url, headers, None::<&[u8]>)
252            .await
253    }
254
255    /// Send a `method` request with both headers AND an optional request
256    /// body, re-checking `url` against the allowlist before any I/O.
257    ///
258    /// Callers pass `None` (inferred via `None::<&[u8]>` or `None::<Vec<u8>>`)
259    /// for verbs that never carry bodies (`GET`, `DELETE`). `Some(body)`
260    /// attaches the bytes and `Content-Length` automatically via reqwest.
261    /// The allowlist gate fires BEFORE body serialization, so a non-
262    /// allowlisted origin never leaks body bytes to the network layer.
263    ///
264    /// # Errors
265    /// Same conditions as [`HttpClient::request_with_headers`].
266    pub async fn request_with_headers_and_body<U, B>(
267        &self,
268        method: Method,
269        url: U,
270        headers: &[(&str, &str)],
271        body: Option<B>,
272    ) -> Result<reqwest::Response>
273    where
274        U: IntoUrl,
275        B: Into<reqwest::Body>,
276    {
277        let parsed = url
278            .into_url()
279            .map_err(|e| Error::InvalidOrigin(format!("{e}")))?;
280        let allowlist = load_allowlist_from_env()?;
281        if !allowlist.iter().any(|g| g.matches(&parsed)) {
282            return Err(Error::InvalidOrigin(parsed.to_string()));
283        }
284        let mut builder = self.inner.request(method, parsed);
285        for (k, v) in headers {
286            builder = builder.header(*k, *v);
287        }
288        if let Some(body) = body {
289            builder = builder.body(body);
290        }
291        let resp = builder.send().await?;
292        Ok(resp)
293    }
294
295    /// Convenience wrapper for `GET url`.
296    ///
297    /// # Errors
298    /// Same as [`HttpClient::request`].
299    pub async fn get<U: IntoUrl>(&self, url: U) -> Result<reqwest::Response> {
300        self.request(Method::GET, url).await
301    }
302
303    /// Convenience wrapper for `POST url`.
304    ///
305    /// # Errors
306    /// Same as [`HttpClient::request`].
307    pub async fn post<U: IntoUrl>(&self, url: U) -> Result<reqwest::Response> {
308        self.request(Method::POST, url).await
309    }
310
311    /// Convenience wrapper for `PATCH url`.
312    ///
313    /// # Errors
314    /// Same as [`HttpClient::request`].
315    pub async fn patch<U: IntoUrl>(&self, url: U) -> Result<reqwest::Response> {
316        self.request(Method::PATCH, url).await
317    }
318
319    /// Convenience wrapper for `DELETE url`.
320    ///
321    /// # Errors
322    /// Same as [`HttpClient::request`].
323    pub async fn delete<U: IntoUrl>(&self, url: U) -> Result<reqwest::Response> {
324        self.request(Method::DELETE, url).await
325    }
326}
327
328// Intentionally NOT implemented:
329//
330//   impl HttpClient { pub fn inner_client(&self) -> &reqwest::Client { ... } }
331//   impl AsRef<reqwest::Client> for HttpClient { ... }
332//   impl Deref for HttpClient { type Target = reqwest::Client; ... }
333//
334// These would defeat SG-01: a caller could pull the raw client out and call
335// `raw.get(url).send()` to bypass the allowlist gate. The compile-fail
336// fixture at `tests/compile-fail/http_client_inner_not_pub.rs` locks this.
337
338/// Build the one-and-only legal HTTP client for this workspace.
339///
340/// The returned [`HttpClient`] has redirects disabled and a 5-second total
341/// timeout (or whatever `opts.total_timeout` is). Callers MUST route every
342/// send through [`HttpClient::request`] so the per-request allowlist
343/// recheck runs — the factory alone is not a sufficient gate because
344/// callers can override the target URL at send time.
345///
346/// # Errors
347/// Returns [`Error::Other`] if `REPOSIX_ALLOWED_ORIGINS` is set but
348/// un-parseable, or [`Error::Http`] if `reqwest` itself refuses to build the
349/// client (e.g. a TLS-backend initialisation failure).
350pub fn client(opts: ClientOpts) -> Result<HttpClient> {
351    // Surface allowlist-parse errors at construction time so misconfigured
352    // operators fail loudly rather than silently.
353    let _ = load_allowlist_from_env()?;
354
355    // SG-01: this `#[allow]` marks the single legal construction site in the
356    // workspace. Any other construction of `reqwest::Client` / `ClientBuilder`
357    // is rejected by the workspace-root `clippy.toml` `disallowed-methods`
358    // lint. Do not remove this comment without updating that lint.
359    #[allow(clippy::disallowed_methods)]
360    let mut builder = reqwest::ClientBuilder::new();
361
362    builder = builder
363        .redirect(reqwest::redirect::Policy::none())
364        .timeout(opts.total_timeout);
365    if let Some(ua) = opts.user_agent {
366        builder = builder.user_agent(ua);
367    }
368    let inner = builder.build()?;
369    Ok(HttpClient { inner })
370}
371
372#[cfg(test)]
373mod tests {
374    use super::*;
375
376    #[test]
377    fn client_opts_default_is_5s_timeout() {
378        let opts = ClientOpts::default();
379        assert_eq!(opts.total_timeout, Duration::from_secs(5));
380        assert!(opts.user_agent.as_deref().unwrap().starts_with("reposix/"));
381    }
382
383    #[test]
384    fn parse_allowlist_default_has_two_entries() {
385        let entries = parse_allowlist("http://127.0.0.1:*,http://localhost:*").unwrap();
386        assert_eq!(entries.len(), 2);
387    }
388
389    #[test]
390    fn parse_allowlist_empty_input_returns_default() {
391        let entries = parse_allowlist("").unwrap();
392        assert_eq!(entries.len(), 2);
393        assert!(entries.iter().any(|g| g.host == "127.0.0.1"));
394        assert!(entries.iter().any(|g| g.host == "localhost"));
395    }
396
397    #[test]
398    fn parse_allowlist_whitespace_only_returns_default() {
399        let entries = parse_allowlist("   \t  ").unwrap();
400        assert_eq!(entries.len(), 2);
401    }
402
403    #[test]
404    fn parse_allowlist_bad_input_errors() {
405        let err = parse_allowlist("not a url").unwrap_err();
406        assert!(matches!(err, Error::Other(_)), "got {err:?}");
407    }
408
409    #[test]
410    fn parse_allowlist_bad_scheme_errors() {
411        assert!(matches!(
412            parse_allowlist("ftp://127.0.0.1:*"),
413            Err(Error::Other(_))
414        ));
415    }
416
417    #[test]
418    fn parse_allowlist_empty_host_errors() {
419        assert!(matches!(
420            parse_allowlist("http://:80"),
421            Err(Error::Other(_))
422        ));
423    }
424
425    #[test]
426    fn parse_allowlist_bad_port_errors() {
427        assert!(matches!(
428            parse_allowlist("http://127.0.0.1:notaport"),
429            Err(Error::Other(_))
430        ));
431    }
432
433    #[test]
434    fn origin_glob_matches_loopback_any_port() {
435        let glob = &parse_allowlist("http://127.0.0.1:*").unwrap()[0];
436        let url = Url::parse("http://127.0.0.1:7878").unwrap();
437        assert!(glob.matches(&url));
438    }
439
440    #[test]
441    fn origin_glob_rejects_https_when_http_configured() {
442        let glob = &parse_allowlist("http://127.0.0.1:*").unwrap()[0];
443        let url = Url::parse("https://127.0.0.1:7878").unwrap();
444        assert!(!glob.matches(&url));
445    }
446
447    #[test]
448    fn origin_glob_rejects_non_loopback_host() {
449        let glob = &parse_allowlist("http://127.0.0.1:*").unwrap()[0];
450        let url = Url::parse("http://evil.example:80").unwrap();
451        assert!(!glob.matches(&url));
452    }
453
454    #[test]
455    fn origin_glob_matches_exact_port() {
456        let glob = &parse_allowlist("http://127.0.0.1:80").unwrap()[0];
457        let url = Url::parse("http://127.0.0.1:80").unwrap();
458        assert!(glob.matches(&url));
459    }
460
461    #[test]
462    fn origin_glob_rejects_wrong_exact_port() {
463        let glob = &parse_allowlist("http://127.0.0.1:80").unwrap()[0];
464        let url = Url::parse("http://127.0.0.1:81").unwrap();
465        assert!(!glob.matches(&url));
466    }
467
468    // L-02: IPv6 literal allowlist support.
469
470    #[test]
471    fn parse_allowlist_accepts_ipv6_with_explicit_port() {
472        let entries = parse_allowlist("http://[::1]:7777").unwrap();
473        assert_eq!(entries.len(), 1);
474        assert_eq!(entries[0].host, "[::1]");
475        assert_eq!(entries[0].port, Some(7777));
476    }
477
478    #[test]
479    fn parse_allowlist_accepts_ipv6_with_wildcard_port() {
480        let entries = parse_allowlist("http://[::1]:*").unwrap();
481        assert_eq!(entries.len(), 1);
482        assert_eq!(entries[0].host, "[::1]");
483        assert_eq!(entries[0].port, None);
484    }
485
486    #[test]
487    fn origin_glob_matches_ipv6_loopback_any_port() {
488        let glob = &parse_allowlist("http://[::1]:*").unwrap()[0];
489        let url = Url::parse("http://[::1]:7777/").unwrap();
490        assert!(glob.matches(&url));
491    }
492
493    #[test]
494    fn origin_glob_matches_ipv6_loopback_exact_port() {
495        let glob = &parse_allowlist("http://[::1]:7777").unwrap()[0];
496        let url = Url::parse("http://[::1]:7777/").unwrap();
497        assert!(glob.matches(&url));
498    }
499
500    #[test]
501    fn origin_glob_ipv6_rejects_wrong_port() {
502        let glob = &parse_allowlist("http://[::1]:7777").unwrap()[0];
503        let url = Url::parse("http://[::1]:7778/").unwrap();
504        assert!(!glob.matches(&url));
505    }
506
507    #[test]
508    fn parse_allowlist_localhost_wildcard_still_parses() {
509        let entries = parse_allowlist("https://localhost:*").unwrap();
510        assert_eq!(entries.len(), 1);
511        assert_eq!(entries[0].scheme, "https");
512        assert_eq!(entries[0].host, "localhost");
513        assert_eq!(entries[0].port, None);
514    }
515}