Skip to main content

linesmith_core/data_context/
fetcher.rs

1//! OAuth `/api/oauth/usage` HTTP client.
2//!
3//! Lowest layer of the rate-limit data pipeline. Talks HTTP to the
4//! Anthropic endpoint, classifies the response, and returns a parsed
5//! [`UsageApiResponse`] on success. Retry / stale-serve / JSONL
6//! fallback behavior lives in the orchestrator above this layer;
7//! this module has no knowledge of the usage-data cache stack.
8//! (The `ureq::Agent` held by [`UreqTransport`] maintains an internal
9//! connection pool — that's a transport-level concern, separate from
10//! the response cache.)
11//!
12//! Canonical spec: `docs/adrs/0011-rate-limit-data-source.md`
13//! §Endpoint contract and §Cache stack (Retry-After rules).
14
15use std::io;
16use std::time::{Duration, SystemTime};
17
18use super::credentials::Credentials;
19use super::errors::UsageError;
20use super::usage::UsageApiResponse;
21
22/// Forward-compat header Anthropic currently requires for the OAuth
23/// usage endpoint. When this rotates we bump the value here.
24const ANTHROPIC_BETA_HEADER: &str = "anthropic-beta";
25const ANTHROPIC_BETA_VALUE: &str = "oauth-2025-04-20";
26
27/// Endpoint path appended to the configured `usage.api_base_url`.
28pub const OAUTH_USAGE_PATH: &str = "/api/oauth/usage";
29
30/// Default per-request timeout per ADR-0011 §Endpoint contract.
31pub const DEFAULT_TIMEOUT: Duration = Duration::from_secs(2);
32
33/// Fallback backoff for 429 responses that omit the `Retry-After`
34/// header. ADR-0011 §Cache stack specifies 300s.
35const DEFAULT_RATE_LIMIT_BACKOFF: Duration = Duration::from_secs(300);
36
37/// Upper bound on parsed `Retry-After` values. A malformed or
38/// pathological header like `Retry-After: 18446744073709551615`
39/// would otherwise produce a Duration the orchestrator can't
40/// meaningfully compare with a cache TTL.
41const MAX_RETRY_AFTER: Duration = Duration::from_secs(24 * 60 * 60);
42
43/// Cap on the response body we'll read. The live endpoint emits
44/// roughly 500 bytes; 64 KiB is two orders of magnitude headroom
45/// without letting a misbehaving (or MITM'd) server OOM us.
46const MAX_RESPONSE_BYTES: u64 = 64 * 1024;
47
48// --- Transport trait ----------------------------------------------------
49
50/// Raw HTTP response — enough for [`fetch_usage`] to classify the
51/// outcome without leaking the HTTP crate to callers.
52pub struct HttpResponse {
53    pub status: u16,
54    pub body: Vec<u8>,
55    /// Verbatim `Retry-After` header value if present. Parsing lives
56    /// in [`parse_retry_after`] so the transport stays dumb.
57    pub retry_after: Option<String>,
58}
59
60/// Injected HTTP surface. [`UreqTransport`] is the default; tests use
61/// a fake to exercise status-handling paths without real I/O. Errors
62/// carry [`io::ErrorKind::TimedOut`] for timeouts; anything else is
63/// treated as a generic network failure in [`fetch_usage`].
64pub trait UsageTransport {
65    fn get(&self, url: &str, token: &str, timeout: Duration) -> io::Result<HttpResponse>;
66}
67
68// --- Public entry point -------------------------------------------------
69
70/// Fetch usage data for the given credentials. Maps transport errors
71/// and HTTP status codes onto the [`UsageError`] taxonomy per
72/// `docs/specs/rate-limit-segments.md` §Error message table.
73pub fn fetch_usage(
74    transport: &dyn UsageTransport,
75    base_url: &str,
76    creds: &Credentials,
77    timeout: Duration,
78) -> Result<UsageApiResponse, UsageError> {
79    let url = build_url(base_url);
80    match transport.get(&url, creds.token(), timeout) {
81        Ok(resp) => interpret_status(resp),
82        Err(e) if e.kind() == io::ErrorKind::TimedOut => Err(UsageError::Timeout),
83        Err(_) => Err(UsageError::NetworkError),
84    }
85}
86
87/// Strip a trailing slash from `base_url` before appending the fixed
88/// path so both `"https://api.anthropic.com"` and
89/// `"https://api.anthropic.com/"` yield a single-slash URL.
90fn build_url(base_url: &str) -> String {
91    let trimmed = base_url.trim_end_matches('/');
92    format!("{trimmed}{OAUTH_USAGE_PATH}")
93}
94
95fn interpret_status(resp: HttpResponse) -> Result<UsageApiResponse, UsageError> {
96    match resp.status {
97        200..=299 => serde_json::from_slice(&resp.body).map_err(|_| UsageError::ParseError),
98        401 => Err(UsageError::Unauthorized),
99        429 => {
100            let retry_after = resp
101                .retry_after
102                .as_deref()
103                .and_then(parse_retry_after)
104                .or(Some(DEFAULT_RATE_LIMIT_BACKOFF));
105            Err(UsageError::RateLimited { retry_after })
106        }
107        // 3xx (when redirects are disabled or exhausted) and 5xx fall
108        // here. ADR-0011 doesn't carve out a distinct variant for
109        // server errors; `NetworkError` triggers the stale-cache
110        // fallback path in the orchestrator, which is the intended
111        // behavior for transient server failures.
112        _ => Err(UsageError::NetworkError),
113    }
114}
115
116/// Parse a `Retry-After` header value. Accepts the two RFC 9110
117/// §10.2.3 forms (RFC 7231 §7.1.3 superseded): integer seconds
118/// (`"120"`) and HTTP-date (`"Fri, 31 Dec 2026 23:59:59 GMT"`).
119/// Past-dated values and values the `httpdate` crate rejects fall
120/// back to `None`; the caller then applies
121/// [`DEFAULT_RATE_LIMIT_BACKOFF`]. Values above [`MAX_RETRY_AFTER`]
122/// are capped so a malformed header can't produce a Duration the
123/// orchestrator treats as "never retry."
124fn parse_retry_after(raw: &str) -> Option<Duration> {
125    let raw = raw.trim();
126    let parsed = if let Ok(secs) = raw.parse::<u64>() {
127        Some(Duration::from_secs(secs))
128    } else {
129        let when = httpdate::parse_http_date(raw).ok()?;
130        when.duration_since(SystemTime::now()).ok()
131    };
132    parsed.map(|d| d.min(MAX_RETRY_AFTER))
133}
134
135// --- ureq transport -----------------------------------------------------
136
137/// `ureq::Agent`-backed [`UsageTransport`]. One agent is shared across
138/// all `fetch_usage` calls so connection pooling / keepalive applies.
139pub struct UreqTransport {
140    agent: ureq::Agent,
141    user_agent: String,
142}
143
144impl UreqTransport {
145    #[must_use]
146    pub fn new() -> Self {
147        // `http_status_as_error(false)` so 4xx/5xx surface as
148        // `Ok(Response)` with a status code rather than `Err` — we
149        // need to inspect 401 and 429 specifically.
150        let mut builder = ureq::Agent::config_builder().http_status_as_error(false);
151        if let Some(proxy) = resolve_proxy_from_env() {
152            builder = builder.proxy(Some(proxy));
153        }
154        Self {
155            agent: ureq::Agent::new_with_config(builder.build()),
156            user_agent: default_user_agent(),
157        }
158    }
159}
160
161/// Resolve a proxy from `ALL_PROXY` / `HTTPS_PROXY` / `HTTP_PROXY`
162/// / `NO_PROXY` (and lowercase variants), matching ureq's documented
163/// env-var order. ureq's `Proxy::try_from_env` does the parsing,
164/// including the `NO_PROXY` exclusion list. A malformed proxy URL
165/// warns to stderr (variable name only — the value can carry
166/// credentials like `http://user:pass@host`) and falls through to a
167/// direct connection. Without the warn the proxy silently drops and
168/// the user sees `[Network error]` with no clue the env var was the
169/// cause.
170fn resolve_proxy_from_env() -> Option<ureq::Proxy> {
171    match ureq::Proxy::try_from_env() {
172        Some(proxy) => Some(proxy),
173        None => {
174            // `try_from_env` returns `None` both when no env var is
175            // set AND when a set var fails to parse. Distinguish the
176            // two by re-reading the env vars in ureq's own iteration
177            // order; a set-but-unparsed value is the case worth
178            // warning about.
179            for var in [
180                "ALL_PROXY",
181                "all_proxy",
182                "HTTPS_PROXY",
183                "https_proxy",
184                "HTTP_PROXY",
185                "http_proxy",
186            ] {
187                if let Ok(val) = std::env::var(var) {
188                    if !val.is_empty() {
189                        // Log the variable NAME only. Proxy URLs
190                        // routinely embed credentials (`user:pass@`);
191                        // echoing the value to stderr / CI logs
192                        // would leak them precisely in the
193                        // misconfiguration scenario this warn is
194                        // trying to help diagnose.
195                        crate::lsm_warn!(
196                            "{var}: failed to parse as proxy URL; falling back to direct connection"
197                        );
198                        return None;
199                    }
200                }
201            }
202            None
203        }
204    }
205}
206
207/// Build the `User-Agent` header value from the compile-time package
208/// version. Exposed for regression tests that pin the format contract.
209#[must_use]
210pub fn default_user_agent() -> String {
211    format!("linesmith/{}", env!("CARGO_PKG_VERSION"))
212}
213
214impl Default for UreqTransport {
215    fn default() -> Self {
216        Self::new()
217    }
218}
219
220impl UsageTransport for UreqTransport {
221    fn get(&self, url: &str, token: &str, timeout: Duration) -> io::Result<HttpResponse> {
222        let auth = format!("Bearer {token}");
223        let mut response = self
224            .agent
225            .get(url)
226            .config()
227            .timeout_global(Some(timeout))
228            .build()
229            .header("Authorization", &auth)
230            .header(ANTHROPIC_BETA_HEADER, ANTHROPIC_BETA_VALUE)
231            .header("User-Agent", &self.user_agent)
232            .call()
233            .map_err(ureq_err_to_io)?;
234
235        let status: u16 = response.status().as_u16();
236        let retry_after = response
237            .headers()
238            .get("retry-after")
239            .and_then(|v| v.to_str().ok())
240            .map(String::from);
241
242        // Cap the body read so a misbehaving / MITM'd server can't
243        // OOM us. Oversized responses surface as ParseError in
244        // `interpret_status` (the serde parse will fail on the
245        // truncated prefix).
246        let body = response
247            .body_mut()
248            .with_config()
249            .limit(MAX_RESPONSE_BYTES)
250            .read_to_vec()
251            .map_err(ureq_err_to_io)?;
252
253        Ok(HttpResponse {
254            status,
255            body,
256            retry_after,
257        })
258    }
259}
260
261/// Collapse `ureq::Error` into `io::Error` for the [`UsageTransport`]
262/// boundary. Timeouts keep [`io::ErrorKind::TimedOut`] so
263/// [`fetch_usage`] can differentiate; everything else is treated as
264/// a generic network failure.
265fn ureq_err_to_io(e: ureq::Error) -> io::Error {
266    match e {
267        ureq::Error::Timeout(_) => io::Error::new(io::ErrorKind::TimedOut, "request timed out"),
268        ureq::Error::Io(inner) => inner,
269        other => io::Error::other(other.to_string()),
270    }
271}
272
273// --- Tests --------------------------------------------------------------
274
275#[cfg(test)]
276mod tests {
277    use super::*;
278    use std::cell::RefCell;
279
280    fn creds() -> Credentials {
281        Credentials::for_testing("test-token-xyz")
282    }
283
284    /// Single pre-programmed response returned on every `get()`. Tests
285    /// verify the call arguments (URL, token, timeout) separately via
286    /// `captured`.
287    struct FakeTransport {
288        response: io::Result<HttpResponse>,
289        captured: RefCell<Option<FakeCall>>,
290    }
291
292    #[derive(Debug, Clone)]
293    struct FakeCall {
294        url: String,
295        token: String,
296        timeout: Duration,
297    }
298
299    impl UsageTransport for FakeTransport {
300        fn get(&self, url: &str, token: &str, timeout: Duration) -> io::Result<HttpResponse> {
301            *self.captured.borrow_mut() = Some(FakeCall {
302                url: url.to_string(),
303                token: token.to_string(),
304                timeout,
305            });
306            match &self.response {
307                Ok(r) => Ok(HttpResponse {
308                    status: r.status,
309                    body: r.body.clone(),
310                    retry_after: r.retry_after.clone(),
311                }),
312                Err(e) => Err(io::Error::new(e.kind(), e.to_string())),
313            }
314        }
315    }
316
317    fn ok_transport(status: u16, body: &str, retry_after: Option<&str>) -> FakeTransport {
318        FakeTransport {
319            response: Ok(HttpResponse {
320                status,
321                body: body.as_bytes().to_vec(),
322                retry_after: retry_after.map(String::from),
323            }),
324            captured: RefCell::new(None),
325        }
326    }
327
328    fn err_transport(kind: io::ErrorKind) -> FakeTransport {
329        FakeTransport {
330            response: Err(io::Error::new(kind, "fake")),
331            captured: RefCell::new(None),
332        }
333    }
334
335    const SAMPLE_OK_BODY: &str = r#"{
336        "five_hour": { "utilization": 22.0, "resets_at": "2026-04-19T05:00:00Z" },
337        "seven_day": { "utilization": 33.0, "resets_at": "2026-04-23T19:00:00Z" }
338    }"#;
339
340    #[test]
341    fn fetch_happy_path_parses_live_shape() {
342        let transport = ok_transport(200, SAMPLE_OK_BODY, None);
343        let resp = fetch_usage(
344            &transport,
345            "https://api.anthropic.com",
346            &creds(),
347            DEFAULT_TIMEOUT,
348        )
349        .expect("ok");
350        assert_eq!(resp.five_hour.unwrap().utilization.value(), 22.0);
351    }
352
353    #[test]
354    fn fetch_builds_url_without_double_slash() {
355        let transport = ok_transport(200, SAMPLE_OK_BODY, None);
356        let _ = fetch_usage(
357            &transport,
358            "https://api.anthropic.com/",
359            &creds(),
360            DEFAULT_TIMEOUT,
361        );
362        let captured = transport.captured.borrow().clone().unwrap();
363        assert_eq!(captured.url, "https://api.anthropic.com/api/oauth/usage");
364    }
365
366    #[test]
367    fn fetch_passes_token_through_to_transport() {
368        let transport = ok_transport(200, SAMPLE_OK_BODY, None);
369        let _ = fetch_usage(
370            &transport,
371            "https://example.test",
372            &Credentials::for_testing("unique-token-42"),
373            DEFAULT_TIMEOUT,
374        );
375        let captured = transport.captured.borrow().clone().unwrap();
376        assert_eq!(captured.token, "unique-token-42");
377    }
378
379    #[test]
380    fn fetch_passes_timeout_through_to_transport() {
381        let transport = ok_transport(200, SAMPLE_OK_BODY, None);
382        let custom_timeout = Duration::from_millis(750);
383        let _ = fetch_usage(&transport, "https://x", &creds(), custom_timeout);
384        let captured = transport.captured.borrow().clone().unwrap();
385        assert_eq!(captured.timeout, custom_timeout);
386    }
387
388    #[test]
389    fn fetch_maps_401_to_unauthorized() {
390        let transport = ok_transport(401, "", None);
391        let err = fetch_usage(&transport, "https://x", &creds(), DEFAULT_TIMEOUT).unwrap_err();
392        assert!(matches!(err, UsageError::Unauthorized));
393    }
394
395    #[test]
396    fn fetch_maps_429_with_integer_retry_after() {
397        let transport = ok_transport(429, "", Some("120"));
398        let err = fetch_usage(&transport, "https://x", &creds(), DEFAULT_TIMEOUT).unwrap_err();
399        match err {
400            UsageError::RateLimited {
401                retry_after: Some(d),
402            } => assert_eq!(d.as_secs(), 120),
403            other => panic!("expected RateLimited(Some(120s)), got {other:?}"),
404        }
405    }
406
407    #[test]
408    fn fetch_maps_429_with_http_date_retry_after() {
409        // `httpdate` validates the day-name-vs-date consistency, so
410        // build the header value via the crate's own formatter rather
411        // than hand-rolling a hopefully-correct fixed date.
412        let future = SystemTime::now() + Duration::from_secs(3600);
413        let header_value = httpdate::fmt_http_date(future);
414        let transport = ok_transport(429, "", Some(&header_value));
415        let err = fetch_usage(&transport, "https://x", &creds(), DEFAULT_TIMEOUT).unwrap_err();
416        let UsageError::RateLimited {
417            retry_after: Some(d),
418        } = err
419        else {
420            panic!("expected RateLimited with Some duration, got {err:?}");
421        };
422        assert!(d.as_secs() > 0, "expected positive duration, got {d:?}");
423    }
424
425    #[test]
426    fn fetch_maps_429_without_retry_after_to_default_backoff() {
427        let transport = ok_transport(429, "", None);
428        let err = fetch_usage(&transport, "https://x", &creds(), DEFAULT_TIMEOUT).unwrap_err();
429        match err {
430            UsageError::RateLimited {
431                retry_after: Some(d),
432            } => assert_eq!(d, DEFAULT_RATE_LIMIT_BACKOFF),
433            other => panic!("expected RateLimited with default backoff, got {other:?}"),
434        }
435    }
436
437    #[test]
438    fn fetch_maps_5xx_to_network_error() {
439        let transport = ok_transport(503, "", None);
440        let err = fetch_usage(&transport, "https://x", &creds(), DEFAULT_TIMEOUT).unwrap_err();
441        assert!(matches!(err, UsageError::NetworkError));
442    }
443
444    #[test]
445    fn fetch_maps_malformed_json_to_parse_error() {
446        let transport = ok_transport(200, "{ not valid json ", None);
447        let err = fetch_usage(&transport, "https://x", &creds(), DEFAULT_TIMEOUT).unwrap_err();
448        assert!(matches!(err, UsageError::ParseError));
449    }
450
451    #[test]
452    fn fetch_maps_timeout_to_usage_timeout() {
453        let transport = err_transport(io::ErrorKind::TimedOut);
454        let err = fetch_usage(&transport, "https://x", &creds(), DEFAULT_TIMEOUT).unwrap_err();
455        assert!(matches!(err, UsageError::Timeout));
456    }
457
458    #[test]
459    fn fetch_maps_connection_refused_to_network_error() {
460        let transport = err_transport(io::ErrorKind::ConnectionRefused);
461        let err = fetch_usage(&transport, "https://x", &creds(), DEFAULT_TIMEOUT).unwrap_err();
462        assert!(matches!(err, UsageError::NetworkError));
463    }
464
465    #[test]
466    fn fetch_401_display_does_not_leak_token() {
467        // Regression guard for credentials.md §Non-functional:
468        // token bytes must not appear in error output for a request
469        // that carried them, even when the server rejects auth.
470        let transport = ok_transport(401, "", None);
471        let err = fetch_usage(
472            &transport,
473            "https://x",
474            &Credentials::for_testing("super-secret-token-abc123"),
475            DEFAULT_TIMEOUT,
476        )
477        .unwrap_err();
478        let display = format!("{err}");
479        let debug = format!("{err:?}");
480        assert!(
481            !display.contains("super-secret-token-abc123"),
482            "display leaked: {display}"
483        );
484        assert!(
485            !debug.contains("super-secret-token-abc123"),
486            "debug leaked: {debug}"
487        );
488    }
489
490    #[test]
491    fn parse_retry_after_integer_seconds() {
492        assert_eq!(parse_retry_after("60"), Some(Duration::from_secs(60)));
493        assert_eq!(parse_retry_after("  60  "), Some(Duration::from_secs(60)));
494    }
495
496    #[test]
497    fn parse_retry_after_zero() {
498        assert_eq!(parse_retry_after("0"), Some(Duration::from_secs(0)));
499    }
500
501    #[test]
502    fn parse_retry_after_http_date_future() {
503        // Roundtrip via `httpdate::fmt_http_date` so we feed parse the
504        // same format the library emits — eliminates day-name drift.
505        let future = SystemTime::now() + Duration::from_secs(3600);
506        let raw = httpdate::fmt_http_date(future);
507        let parsed = parse_retry_after(&raw);
508        assert!(parsed.is_some_and(|d| d.as_secs() > 0));
509    }
510
511    #[test]
512    fn parse_retry_after_http_date_past_returns_none() {
513        // A past date can't yield a positive duration-since-now.
514        assert_eq!(parse_retry_after("Thu, 01 Jan 1970 00:00:00 GMT"), None);
515    }
516
517    #[test]
518    fn parse_retry_after_garbage_returns_none() {
519        assert_eq!(parse_retry_after("not a date"), None);
520        assert_eq!(parse_retry_after(""), None);
521        assert_eq!(parse_retry_after("-1"), None);
522    }
523
524    #[test]
525    fn parse_retry_after_caps_pathological_values() {
526        // u64::MAX seconds would otherwise produce a Duration the
527        // orchestrator can't meaningfully compare to a cache TTL.
528        let parsed = parse_retry_after(&u64::MAX.to_string()).unwrap();
529        assert_eq!(parsed, MAX_RETRY_AFTER);
530    }
531
532    #[test]
533    fn ureq_transport_construction_pins_user_agent_and_proxy_path() {
534        // `UreqTransport::new` is infallible by contract: a future
535        // refactor that wires the proxy via a fallible API without
536        // handling the error would regress here. The user_agent
537        // assertion doubles as a constructor smoke for the
538        // unrelated field. ureq's `Proxy::try_from_env` is the
539        // canonical RFC implementation; the warn-on-unparseable-
540        // env-var case our wrapper adds needs a warn-sink hook to
541        // assert directly.
542        let transport = UreqTransport::new();
543        assert_eq!(transport.user_agent, default_user_agent());
544    }
545
546    #[test]
547    fn default_user_agent_includes_version_and_crate_name() {
548        // Regression guard for the ADR-0011 §Endpoint contract
549        // User-Agent requirement. Pins the format so a future
550        // refactor can't silently drop the version suffix.
551        let ua = default_user_agent();
552        assert!(ua.starts_with("linesmith/"), "ua = {ua}");
553        assert!(
554            ua.ends_with(env!("CARGO_PKG_VERSION")),
555            "ua = {ua}; version = {}",
556            env!("CARGO_PKG_VERSION"),
557        );
558    }
559
560    #[test]
561    fn fetch_204_empty_body_surfaces_parse_error() {
562        // 2xx non-200 responses with empty bodies collapse to
563        // ParseError via `serde_json::from_slice(b"")`. Lock the
564        // behavior down so a future change to 204 handling is
565        // intentional.
566        let transport = ok_transport(204, "", None);
567        let err = fetch_usage(&transport, "https://x", &creds(), DEFAULT_TIMEOUT).unwrap_err();
568        assert!(matches!(err, UsageError::ParseError));
569    }
570}