Skip to main content

yfinance/
client.rs

1//! HTTP client with cookie + crumb session management.
2//!
3//! Yahoo's data endpoints require a `crumb` token tied to a session cookie that
4//! must be acquired in advance. [`YfClient`] handles the full handshake (basic
5//! cookie strategy with CSRF consent fallback), retries transient failures, and
6//! caches the crumb for the configured TTL.
7
8use std::collections::HashMap;
9use std::sync::Arc;
10use std::time::{Duration, Instant};
11
12use parking_lot::RwLock;
13use rand::seq::SliceRandom;
14use reqwest::header::{HeaderMap, HeaderValue, ACCEPT, ACCEPT_LANGUAGE, REFERER, USER_AGENT};
15use reqwest::{Client, Response, StatusCode};
16use serde::de::DeserializeOwned;
17use tokio::sync::Mutex;
18use tokio::time::sleep;
19
20use crate::error::{Error, Result};
21
22/// Default User-Agent strings rotated per session.
23///
24/// Modern desktop Chrome on Windows/macOS — Yahoo blocks obvious bots, but is
25/// permissive of typical browsers.
26const USER_AGENTS: &[&str] = &[
27    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
28    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
29    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0",
30    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15",
31];
32
33/// Yahoo Finance host for primary data endpoints.
34pub(crate) const QUERY1_HOST: &str = "https://query1.finance.yahoo.com";
35/// Alternate host. Yahoo treats query1/query2 as interchangeable; some endpoints
36/// favor query2 historically.
37pub(crate) const QUERY2_HOST: &str = "https://query2.finance.yahoo.com";
38/// Default base URL for the ISIN suggest endpoint hosted by Business Insider.
39/// It's the same source the Python yfinance library queries.
40pub(crate) const DEFAULT_ISIN_BASE: &str =
41    "https://markets.businessinsider.com/ajax/SearchController_Suggest";
42/// Default base URL for Yahoo Finance's news XHR endpoint family.
43pub(crate) const DEFAULT_NEWS_BASE: &str = "https://finance.yahoo.com";
44/// Default base URL for the human-facing quote page (used by the profile
45/// scrape fallback).
46pub(crate) const DEFAULT_QUOTE_PAGE_BASE: &str = "https://finance.yahoo.com/quote";
47
48/// Pulled from yfinance's behavior — crumbs typically remain valid for ~1 hour.
49const CRUMB_TTL: Duration = Duration::from_secs(60 * 30);
50
51/// Hint for which Yahoo API surface to prefer when more than one is
52/// available (e.g. legacy `v8/finance/chart` vs the newer `v7/finance/quote`
53/// for snapshot data). The default is [`ApiPreference::Auto`] which lets the
54/// crate pick per-endpoint.
55///
56/// Today this is plumbed through the client builder for parity with the
57/// gramistella upstream; individual endpoints don't yet branch on it.
58#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
59pub enum ApiPreference {
60    /// Pick the best surface per endpoint (default).
61    #[default]
62    Auto,
63    /// Prefer the legacy `v8/finance/chart` family.
64    Chart,
65    /// Prefer the newer `v7/finance/quote` family.
66    Quote,
67}
68
69/// Whether a request should consult / write the in-memory response cache.
70///
71/// The cache is only active when [`YfClientBuilder::cache_ttl`] is non-zero;
72/// otherwise the variants are a no-op and the request always hits the network.
73#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
74pub enum CacheMode {
75    /// Read from the cache if a fresh entry exists, otherwise fetch and store.
76    #[default]
77    Use,
78    /// Skip cache reads but still write the fetched response.
79    Refresh,
80    /// Skip both reads and writes for this request.
81    Bypass,
82}
83
84/// Per-request override for the retry policy. `None` means inherit the
85/// client-wide configuration set via the builder.
86#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub struct RetryConfig {
88    /// Maximum retries on transient failures (5xx, 429, transport errors).
89    pub max_retries: u32,
90    /// Initial backoff between retries; doubled each attempt.
91    pub initial_backoff: Duration,
92}
93
94impl RetryConfig {
95    /// Build a new policy.
96    pub fn new(max_retries: u32, initial_backoff: Duration) -> Self {
97        Self {
98            max_retries,
99            initial_backoff,
100        }
101    }
102}
103
104/// Builder for [`YfClient`].
105#[derive(Debug)]
106pub struct YfClientBuilder {
107    user_agent: Option<String>,
108    timeout: Duration,
109    base_query_host: String,
110    crumb_url: Option<String>,
111    cookie_prime_url: Option<String>,
112    isin_base_url: Option<String>,
113    news_base_url: Option<String>,
114    quote_page_base_url: Option<String>,
115    session_cookie: Option<String>,
116    session_crumb: Option<String>,
117    max_retries: u32,
118    retry_backoff: Duration,
119    cache_ttl: Duration,
120    api_preference: ApiPreference,
121    underlying: Option<Client>,
122}
123
124impl Default for YfClientBuilder {
125    fn default() -> Self {
126        Self {
127            user_agent: None,
128            timeout: Duration::from_secs(30),
129            // Match Python yfinance and the gramistella upstream: query1 is the
130            // primary host, query2 the fallback. Some endpoints (notably the
131            // crumb fetch) are stricter on query2.
132            base_query_host: QUERY1_HOST.to_string(),
133            crumb_url: None,
134            cookie_prime_url: None,
135            isin_base_url: None,
136            news_base_url: None,
137            quote_page_base_url: None,
138            session_cookie: None,
139            session_crumb: None,
140            max_retries: 3,
141            retry_backoff: Duration::from_millis(500),
142            cache_ttl: Duration::ZERO,
143            api_preference: ApiPreference::default(),
144            underlying: None,
145        }
146    }
147}
148
149impl YfClientBuilder {
150    /// Override the User-Agent header. By default a desktop browser UA is picked at random.
151    pub fn user_agent(mut self, ua: impl Into<String>) -> Self {
152        self.user_agent = Some(ua.into());
153        self
154    }
155
156    /// Per-request timeout. Default: 30 seconds.
157    pub fn timeout(mut self, dur: Duration) -> Self {
158        self.timeout = dur;
159        self
160    }
161
162    /// Maximum retries on transient errors (5xx, timeouts, 429 with backoff).
163    pub fn max_retries(mut self, n: u32) -> Self {
164        self.max_retries = n;
165        self
166    }
167
168    /// Initial backoff between retries (doubled each attempt).
169    pub fn retry_backoff(mut self, dur: Duration) -> Self {
170        self.retry_backoff = dur;
171        self
172    }
173
174    /// Apply a [`RetryConfig`] (sugar for `max_retries` + `retry_backoff`).
175    pub fn retry(mut self, cfg: RetryConfig) -> Self {
176        self.max_retries = cfg.max_retries;
177        self.retry_backoff = cfg.initial_backoff;
178        self
179    }
180
181    /// Set the in-memory response cache TTL. `Duration::ZERO` (default)
182    /// disables caching entirely. Cache keys are URL+query so the same
183    /// request from different builders is shared.
184    pub fn cache_ttl(mut self, ttl: Duration) -> Self {
185        self.cache_ttl = ttl;
186        self
187    }
188
189    /// Set the [`ApiPreference`] hint for endpoints that have more than one
190    /// implementation. Currently advisory.
191    pub fn api_preference(mut self, pref: ApiPreference) -> Self {
192        self.api_preference = pref;
193        self
194    }
195
196    /// Override the base host used for data requests. Useful for tests against
197    /// a mock server.
198    pub fn base_host(mut self, host: impl Into<String>) -> Self {
199        self.base_query_host = host.into();
200        self
201    }
202
203    /// Override the crumb-fetch URL (full URL, e.g.
204    /// `http://127.0.0.1:1234/v1/test/getcrumb`). When set, the default
205    /// query1 → query2 fallback is bypassed. Used by tests that mock Yahoo.
206    pub fn crumb_url(mut self, url: impl Into<String>) -> Self {
207        self.crumb_url = Some(url.into());
208        self
209    }
210
211    /// Override the cookie-priming URL (default: `https://fc.yahoo.com/consent`).
212    /// Pass an empty string to skip priming entirely — useful in offline tests.
213    pub fn cookie_prime_url(mut self, url: impl Into<String>) -> Self {
214        self.cookie_prime_url = Some(url.into());
215        self
216    }
217
218    /// Inject a raw `Cookie:` header value (semicolon-separated `key=value`
219    /// pairs). When set, it's sent on every request to Yahoo hosts in
220    /// addition to the `reqwest` cookie store. Combine with
221    /// `cookie_prime_url("")` to skip the consent fetch when you already
222    /// have a browser session.
223    ///
224    /// ```no_run
225    /// # use yfinance::YfClient;
226    /// // Cookies copied from your browser's DevTools Network tab.
227    /// let client = YfClient::builder()
228    ///     .cookie_prime_url("")
229    ///     .session_cookie("A1=d=...; A3=d=...; gpp=DBAA")
230    ///     .build()?;
231    /// # Ok::<(), yfinance::Error>(())
232    /// ```
233    pub fn session_cookie(mut self, cookie_header: impl Into<String>) -> Self {
234        self.session_cookie = Some(cookie_header.into());
235        self
236    }
237
238    /// Inject a pre-fetched crumb token. When set, the crumb-fetch handshake
239    /// (`fc.yahoo.com/consent` + `query1/v1/test/getcrumb`) is skipped on
240    /// authenticated requests — the supplied crumb is used directly.
241    ///
242    /// Pair with [`Self::session_cookie`] to bring an entire browser session
243    /// — paste your `crumb` token from a `query1/v1/test/getcrumb` request
244    /// in DevTools.
245    pub fn session_crumb(mut self, crumb: impl Into<String>) -> Self {
246        self.session_crumb = Some(crumb.into());
247        self
248    }
249
250    /// Override the base URL of the ISIN suggest service (default:
251    /// `https://markets.businessinsider.com/ajax/SearchController_Suggest`).
252    pub fn isin_base_url(mut self, url: impl Into<String>) -> Self {
253        self.isin_base_url = Some(url.into());
254        self
255    }
256
257    /// Override the base URL of Yahoo's news XHR endpoint family
258    /// (default: `https://finance.yahoo.com`).
259    pub fn news_base_url(mut self, url: impl Into<String>) -> Self {
260        self.news_base_url = Some(url.into());
261        self
262    }
263
264    /// Override the base URL of the human-facing quote page used by the
265    /// profile scrape fallback (default: `https://finance.yahoo.com/quote`).
266    pub fn quote_page_base_url(mut self, url: impl Into<String>) -> Self {
267        self.quote_page_base_url = Some(url.into());
268        self
269    }
270
271    /// Inject a pre-configured `reqwest::Client` (e.g. with custom proxy/TLS).
272    ///
273    /// The client must have a cookie store enabled.
274    pub fn with_client(mut self, client: Client) -> Self {
275        self.underlying = Some(client);
276        self
277    }
278
279    /// Build the client.
280    pub fn build(self) -> Result<YfClient> {
281        let user_agent = self.user_agent.unwrap_or_else(|| {
282            USER_AGENTS
283                .choose(&mut rand::thread_rng())
284                .copied()
285                .unwrap_or(USER_AGENTS[0])
286                .to_string()
287        });
288
289        let inner = if let Some(c) = self.underlying {
290            c
291        } else {
292            Client::builder()
293                .cookie_store(true)
294                .gzip(true)
295                .timeout(self.timeout)
296                .user_agent(&user_agent)
297                .build()?
298        };
299
300        Ok(YfClient {
301            inner: Arc::new(Inner {
302                http: inner,
303                user_agent,
304                base_host: self.base_query_host,
305                crumb_url: self.crumb_url,
306                cookie_prime_url: self.cookie_prime_url,
307                isin_base_url: self
308                    .isin_base_url
309                    .unwrap_or_else(|| DEFAULT_ISIN_BASE.to_string()),
310                news_base_url: self
311                    .news_base_url
312                    .unwrap_or_else(|| DEFAULT_NEWS_BASE.to_string()),
313                quote_page_base_url: self
314                    .quote_page_base_url
315                    .unwrap_or_else(|| DEFAULT_QUOTE_PAGE_BASE.to_string()),
316                session_cookie: self.session_cookie,
317                session_crumb: self.session_crumb,
318                max_retries: self.max_retries,
319                retry_backoff: self.retry_backoff,
320                cache_ttl: self.cache_ttl,
321                api_preference: self.api_preference,
322                crumb_state: Mutex::new(()),
323                crumb: RwLock::new(None),
324                cache: RwLock::new(HashMap::new()),
325            }),
326        })
327    }
328}
329
330/// Asynchronous HTTP client that authenticates against Yahoo Finance and
331/// performs JSON requests with retry/backoff.
332///
333/// Cheap to clone — internally an `Arc`.
334#[derive(Clone)]
335pub struct YfClient {
336    inner: Arc<Inner>,
337}
338
339impl std::fmt::Debug for YfClient {
340    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
341        f.debug_struct("YfClient")
342            .field("base_host", &self.inner.base_host)
343            .field("max_retries", &self.inner.max_retries)
344            .field("cache_ttl", &self.inner.cache_ttl)
345            .finish()
346    }
347}
348
349struct Inner {
350    http: Client,
351    user_agent: String,
352    base_host: String,
353    crumb_url: Option<String>,
354    cookie_prime_url: Option<String>,
355    isin_base_url: String,
356    news_base_url: String,
357    quote_page_base_url: String,
358    session_cookie: Option<String>,
359    session_crumb: Option<String>,
360    max_retries: u32,
361    retry_backoff: Duration,
362    cache_ttl: Duration,
363    api_preference: ApiPreference,
364    /// Serializes crumb refreshes so concurrent callers share one handshake.
365    crumb_state: Mutex<()>,
366    crumb: RwLock<Option<CrumbState>>,
367    cache: RwLock<HashMap<String, CacheEntry>>,
368}
369
370#[derive(Clone)]
371struct CacheEntry {
372    body: bytes::Bytes,
373    inserted: Instant,
374}
375
376#[derive(Debug, Clone)]
377struct CrumbState {
378    crumb: String,
379    fetched_at: std::time::Instant,
380}
381
382impl YfClient {
383    /// Build a client with default settings.
384    pub fn new() -> Result<Self> {
385        YfClientBuilder::default().build()
386    }
387
388    /// Returns a fresh builder.
389    pub fn builder() -> YfClientBuilder {
390        YfClientBuilder::default()
391    }
392
393    /// The base data host (`query1.finance.yahoo.com` by default).
394    #[allow(dead_code)]
395    pub(crate) fn base_host(&self) -> &str {
396        &self.inner.base_host
397    }
398
399    /// The configured ISIN suggest endpoint base URL.
400    pub(crate) fn isin_base_url(&self) -> &str {
401        &self.inner.isin_base_url
402    }
403
404    /// The configured base URL for the quote-page scrape (no trailing `/`).
405    pub(crate) fn quote_page_base_url(&self) -> &str {
406        &self.inner.quote_page_base_url
407    }
408
409    /// The configured API preference hint.
410    pub fn api_preference(&self) -> ApiPreference {
411        self.inner.api_preference
412    }
413
414    /// Construct a POST `RequestBuilder` to `path` on the news host with the
415    /// standard browser headers attached.
416    pub(crate) fn news_post(&self, path: &str) -> reqwest::RequestBuilder {
417        let url = format!("{}{}", self.inner.news_base_url, path);
418        self.inner.http.post(url).headers(self.std_headers())
419    }
420
421    /// Construct a `reqwest::RequestBuilder` for `path` on the data host with
422    /// standard browser headers attached.
423    pub(crate) fn data_get(&self, path: &str) -> reqwest::RequestBuilder {
424        let url = format!("{}{}", self.inner.base_host, path);
425        self.inner.http.get(url).headers(self.std_headers())
426    }
427
428    /// Construct a request against an arbitrary URL with browser headers.
429    pub(crate) fn raw_get(&self, url: &str) -> reqwest::RequestBuilder {
430        self.inner.http.get(url).headers(self.std_headers())
431    }
432
433    /// Send a `RequestBuilder` to completion, optionally recording the body as
434    /// a fixture under `(endpoint, symbol)`. Returns `Ok(None)` for non-success
435    /// statuses where callers want to degrade gracefully (e.g. ISIN lookup),
436    /// otherwise the response body. Used by modules that don't go through the
437    /// JSON helpers — `isin` (text response from a 3rd-party host) and `news`
438    /// (POST with JSON body).
439    pub(crate) async fn send_text_recorded(
440        &self,
441        req: reqwest::RequestBuilder,
442        record_as: Option<(&str, &str)>,
443    ) -> Result<Option<String>> {
444        let resp = req.send().await?;
445        let status = resp.status();
446        let text = resp.text().await.unwrap_or_default();
447        if status.is_success() {
448            Self::maybe_record(record_as, text.as_bytes());
449            return Ok(Some(text));
450        }
451        if status == StatusCode::TOO_MANY_REQUESTS {
452            return Err(Error::RateLimited);
453        }
454        if status.is_server_error() || status.as_u16() == 404 {
455            return Err(Error::Status {
456                status: status.as_u16(),
457                message: text.chars().take(200).collect(),
458            });
459        }
460        Ok(None)
461    }
462
463    /// Percent-encode characters that aren't safe in a Yahoo URL path segment.
464    /// Yahoo accepts unencoded `. - = ^ _` in symbols (`BRK-B`, `^GSPC`, …).
465    pub(crate) fn path_encode(s: &str) -> String {
466        s.chars()
467            .map(|c| match c {
468                'A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '-' | '=' | '^' | '_' => c.to_string(),
469                _ => format!("%{:02X}", c as u32),
470            })
471            .collect()
472    }
473
474    fn std_headers(&self) -> HeaderMap {
475        let mut h = HeaderMap::new();
476        h.insert(
477            USER_AGENT,
478            HeaderValue::from_str(&self.inner.user_agent).unwrap(),
479        );
480        h.insert(
481            ACCEPT,
482            HeaderValue::from_static("application/json, text/plain, */*"),
483        );
484        h.insert(ACCEPT_LANGUAGE, HeaderValue::from_static("en-US,en;q=0.9"));
485        h.insert(
486            REFERER,
487            HeaderValue::from_static("https://finance.yahoo.com/"),
488        );
489        if let Some(cookie) = self.inner.session_cookie.as_deref() {
490            if let Ok(v) = HeaderValue::from_str(cookie) {
491                h.insert(reqwest::header::COOKIE, v);
492            }
493        }
494        h
495    }
496
497    /// Send a GET to `path` (relative to the data host) returning a parsed JSON body.
498    ///
499    /// Adds the current crumb to the query string when one is available, and
500    /// applies retry/backoff to transient failures.
501    ///
502    /// `record_as` labels the response for fixture recording: when the crate is
503    /// built with `test-mode` and `YF_RECORD=1` is set, the raw body is written
504    /// to `tests/fixtures/{endpoint}_{symbol}.json`. Pass `None` to skip.
505    pub(crate) async fn get_json<T: DeserializeOwned>(
506        &self,
507        path: &str,
508        query: &[(&str, String)],
509        record_as: Option<(&str, &str)>,
510    ) -> Result<T> {
511        self.get_json_cached(path, query, record_as, CacheMode::Use)
512            .await
513    }
514
515    /// Like [`Self::get_json`] but consults the response cache per the given
516    /// [`CacheMode`]. Used by builders that expose per-request overrides.
517    pub(crate) async fn get_json_cached<T: DeserializeOwned>(
518        &self,
519        path: &str,
520        query: &[(&str, String)],
521        record_as: Option<(&str, &str)>,
522        cache_mode: CacheMode,
523    ) -> Result<T> {
524        let body = self
525            .get_bytes(path, query, /*needs_crumb=*/ false, cache_mode)
526            .await?;
527        Self::maybe_record(record_as, &body);
528        serde_json::from_slice(&body).map_err(Error::from)
529    }
530
531    /// Variant of [`Self::get_json`] that ensures a crumb is appended to the query.
532    pub(crate) async fn get_json_crumb<T: DeserializeOwned>(
533        &self,
534        path: &str,
535        query: &[(&str, String)],
536        record_as: Option<(&str, &str)>,
537    ) -> Result<T> {
538        let body = self
539            .get_bytes(path, query, /*needs_crumb=*/ true, CacheMode::Use)
540            .await?;
541        Self::maybe_record(record_as, &body);
542        serde_json::from_slice(&body).map_err(Error::from)
543    }
544
545    /// Fetch one or more `quoteSummary` modules for `symbol`. Returns the first
546    /// `result[0]` Map (or `None` when Yahoo returned an empty result), with
547    /// `quoteSummary.error` mapped to [`Error::Yahoo`].
548    pub(crate) async fn fetch_quote_summary(
549        &self,
550        symbol: &str,
551        modules: &str,
552        fixture_label: &str,
553    ) -> Result<Option<serde_json::Map<String, serde_json::Value>>> {
554        #[derive(serde::Deserialize)]
555        struct Envelope {
556            #[serde(rename = "quoteSummary")]
557            quote_summary: Inner,
558        }
559        #[derive(serde::Deserialize)]
560        struct Inner {
561            #[serde(default)]
562            result: Vec<serde_json::Map<String, serde_json::Value>>,
563            #[serde(default)]
564            error: Option<serde_json::Value>,
565        }
566
567        let path = format!("/v10/finance/quoteSummary/{}", Self::path_encode(symbol));
568        let q = vec![
569            ("modules", modules.to_string()),
570            ("formatted", "false".into()),
571            ("corsDomain", "finance.yahoo.com".into()),
572        ];
573        let env: Envelope = self
574            .get_json_crumb(&path, &q, Some((fixture_label, symbol)))
575            .await?;
576        if let Some(err) = env.quote_summary.error {
577            return Err(Error::Yahoo {
578                symbol: symbol.to_string(),
579                code: format!("{fixture_label}_error"),
580                description: err.to_string(),
581            });
582        }
583        Ok(env.quote_summary.result.into_iter().next())
584    }
585
586    fn maybe_record(label: Option<(&str, &str)>, body: &[u8]) {
587        #[cfg(feature = "test-mode")]
588        {
589            if let Some((endpoint, symbol)) = label {
590                if crate::test_fixtures::is_recording() {
591                    if let Ok(text) = std::str::from_utf8(body) {
592                        if let Err(e) =
593                            crate::test_fixtures::record_fixture(endpoint, symbol, "json", text)
594                        {
595                            eprintln!("YF_RECORD: failed to write fixture for {symbol}: {e}");
596                        }
597                    }
598                }
599            }
600        }
601        #[cfg(not(feature = "test-mode"))]
602        {
603            let _ = (label, body);
604        }
605    }
606
607    async fn get_bytes(
608        &self,
609        path: &str,
610        query: &[(&str, String)],
611        needs_crumb: bool,
612        cache_mode: CacheMode,
613    ) -> Result<bytes::Bytes> {
614        let mut attempt: u32 = 0;
615        let mut backoff = self.inner.retry_backoff;
616
617        let mut q: Vec<(&str, String)> = query.iter().map(|(k, v)| (*k, v.clone())).collect();
618        if needs_crumb {
619            let crumb = self.ensure_crumb().await?;
620            q.push(("crumb", crumb));
621        }
622
623        let cache_active = self.inner.cache_ttl > Duration::ZERO && cache_mode != CacheMode::Bypass;
624        let cache_key =
625            cache_active.then(|| format!("{}{}?{}", self.inner.base_host, path, encode_query(&q)));
626        if let Some(key) = &cache_key {
627            if cache_mode == CacheMode::Use {
628                if let Some(body) = self.cache_lookup(key) {
629                    return Ok(body);
630                }
631            }
632        }
633
634        loop {
635            let req = self.data_get(path).query(&q);
636            let res = req.send().await;
637            match res {
638                Ok(r) => match self.handle_response(r).await {
639                    Ok(body) => {
640                        if let Some(key) = &cache_key {
641                            self.cache_store(key.clone(), body.clone());
642                        }
643                        return Ok(body);
644                    }
645                    Err(e) if attempt < self.inner.max_retries && is_retryable(&e) => {
646                        log::debug!("retry {}: {}", attempt + 1, e);
647                        sleep(backoff).await;
648                        attempt += 1;
649                        backoff = backoff.saturating_mul(2);
650                    }
651                    Err(e) => return Err(e),
652                },
653                Err(e) if attempt < self.inner.max_retries => {
654                    log::debug!("retry {} (transport): {}", attempt + 1, e);
655                    sleep(backoff).await;
656                    attempt += 1;
657                    backoff = backoff.saturating_mul(2);
658                }
659                Err(e) => return Err(e.into()),
660            }
661        }
662    }
663
664    async fn handle_response(&self, res: Response) -> Result<bytes::Bytes> {
665        let status = res.status();
666        if status.is_success() {
667            return Ok(res.bytes().await?);
668        }
669        if status == StatusCode::TOO_MANY_REQUESTS {
670            return Err(Error::RateLimited);
671        }
672        let body = res.text().await.unwrap_or_default();
673        let snippet = body.chars().take(200).collect::<String>();
674        Err(Error::Status {
675            status: status.as_u16(),
676            message: snippet,
677        })
678    }
679
680    fn cache_lookup(&self, key: &str) -> Option<bytes::Bytes> {
681        let entry = self.inner.cache.read().get(key).cloned()?;
682        if entry.inserted.elapsed() < self.inner.cache_ttl {
683            Some(entry.body)
684        } else {
685            None
686        }
687    }
688
689    fn cache_store(&self, key: String, body: bytes::Bytes) {
690        self.inner.cache.write().insert(
691            key,
692            CacheEntry {
693                body,
694                inserted: Instant::now(),
695            },
696        );
697    }
698
699    /// Returns a valid crumb, fetching one if missing or stale.
700    async fn ensure_crumb(&self) -> Result<String> {
701        if let Some(injected) = self.inner.session_crumb.clone() {
702            return Ok(injected);
703        }
704        if let Some(state) = self.inner.crumb.read().clone() {
705            if state.fetched_at.elapsed() < CRUMB_TTL {
706                return Ok(state.crumb);
707            }
708        }
709
710        // Serialize: we hold the mutex across the fetch so concurrent callers
711        // wait and reuse the result.
712        let _guard = self.inner.crumb_state.lock().await;
713        // Re-check after acquiring the lock.
714        if let Some(state) = self.inner.crumb.read().clone() {
715            if state.fetched_at.elapsed() < CRUMB_TTL {
716                return Ok(state.crumb);
717            }
718        }
719
720        let crumb = self.fetch_crumb().await?;
721        *self.inner.crumb.write() = Some(CrumbState {
722            crumb: crumb.clone(),
723            fetched_at: std::time::Instant::now(),
724        });
725        Ok(crumb)
726    }
727
728    async fn fetch_crumb(&self) -> Result<String> {
729        // Step 1: prime cookies. Default points at fc.yahoo.com; tests
730        // override (or set to empty to skip).
731        // The `/consent` path is what other Yahoo clients (Python yfinance,
732        // gramistella/yfinance-rs) hit — root `/` is more aggressively
733        // throttled and sometimes returns 404 anyway.
734        let prime = self
735            .inner
736            .cookie_prime_url
737            .as_deref()
738            .unwrap_or("https://fc.yahoo.com/consent");
739        if !prime.is_empty() {
740            let _ = self.raw_get(prime).send().await.ok();
741        }
742
743        // Step 2: explicit override wins, otherwise try query1 then query2.
744        let urls: Vec<String> = if let Some(u) = &self.inner.crumb_url {
745            vec![u.clone()]
746        } else {
747            vec![
748                format!("{}/v1/test/getcrumb", QUERY1_HOST),
749                format!("{}/v1/test/getcrumb", QUERY2_HOST),
750            ]
751        };
752        for url in &urls {
753            let res = self.raw_get(url).send().await;
754            let r = match res {
755                Ok(r) => r,
756                Err(_) => continue,
757            };
758            if !r.status().is_success() {
759                continue;
760            }
761            let text = r.text().await.unwrap_or_default();
762            let crumb = text.trim().trim_matches('"').to_string();
763            if !crumb.is_empty() && !crumb.contains('<') {
764                return Ok(crumb);
765            }
766        }
767
768        Err(Error::Auth(
769            "could not fetch crumb token from either query1 or query2".into(),
770        ))
771    }
772}
773
774fn encode_query(q: &[(&str, String)]) -> String {
775    use std::fmt::Write;
776    let mut buf = String::new();
777    for (i, (k, v)) in q.iter().enumerate() {
778        if i > 0 {
779            buf.push('&');
780        }
781        let _ = write!(&mut buf, "{k}={v}");
782    }
783    buf
784}
785
786fn is_retryable(err: &Error) -> bool {
787    match err {
788        Error::Http(e) => {
789            e.is_timeout() || e.is_connect() || matches!(e.status(), Some(s) if s.is_server_error())
790        }
791        Error::Status { status, .. } => *status >= 500,
792        Error::RateLimited => true,
793        _ => false,
794    }
795}
796
797#[cfg(test)]
798mod tests {
799    use super::*;
800
801    #[tokio::test]
802    async fn build_default() {
803        let c = YfClient::new().expect("build");
804        assert!(c.base_host().contains("yahoo"));
805    }
806
807    #[test]
808    fn retryable_classification() {
809        assert!(is_retryable(&Error::RateLimited));
810        assert!(is_retryable(&Error::Status {
811            status: 502,
812            message: String::new()
813        }));
814        assert!(!is_retryable(&Error::Status {
815            status: 404,
816            message: String::new()
817        }));
818    }
819}