1use std::collections::HashMap;
9use std::sync::Arc;
10use std::time::{Duration, Instant};
11
12use parking_lot::RwLock;
13use rand::seq::SliceRandom;
14use reqwest::header::{HeaderMap, HeaderValue, ACCEPT, ACCEPT_LANGUAGE, REFERER, USER_AGENT};
15use reqwest::{Client, Response, StatusCode};
16use serde::de::DeserializeOwned;
17use tokio::sync::Mutex;
18use tokio::time::sleep;
19
20use crate::error::{Error, Result};
21
22const USER_AGENTS: &[&str] = &[
27 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
28 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
29 "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0",
30 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15",
31];
32
33pub(crate) const QUERY1_HOST: &str = "https://query1.finance.yahoo.com";
35pub(crate) const QUERY2_HOST: &str = "https://query2.finance.yahoo.com";
38pub(crate) const DEFAULT_ISIN_BASE: &str =
41 "https://markets.businessinsider.com/ajax/SearchController_Suggest";
42pub(crate) const DEFAULT_NEWS_BASE: &str = "https://finance.yahoo.com";
44pub(crate) const DEFAULT_QUOTE_PAGE_BASE: &str = "https://finance.yahoo.com/quote";
47
48const CRUMB_TTL: Duration = Duration::from_secs(60 * 30);
50
51#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
59pub enum ApiPreference {
60 #[default]
62 Auto,
63 Chart,
65 Quote,
67}
68
69#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
74pub enum CacheMode {
75 #[default]
77 Use,
78 Refresh,
80 Bypass,
82}
83
84#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub struct RetryConfig {
88 pub max_retries: u32,
90 pub initial_backoff: Duration,
92}
93
94impl RetryConfig {
95 pub fn new(max_retries: u32, initial_backoff: Duration) -> Self {
97 Self {
98 max_retries,
99 initial_backoff,
100 }
101 }
102}
103
104#[derive(Debug)]
106pub struct YfClientBuilder {
107 user_agent: Option<String>,
108 timeout: Duration,
109 base_query_host: String,
110 crumb_url: Option<String>,
111 cookie_prime_url: Option<String>,
112 isin_base_url: Option<String>,
113 news_base_url: Option<String>,
114 quote_page_base_url: Option<String>,
115 session_cookie: Option<String>,
116 session_crumb: Option<String>,
117 max_retries: u32,
118 retry_backoff: Duration,
119 cache_ttl: Duration,
120 api_preference: ApiPreference,
121 underlying: Option<Client>,
122}
123
124impl Default for YfClientBuilder {
125 fn default() -> Self {
126 Self {
127 user_agent: None,
128 timeout: Duration::from_secs(30),
129 base_query_host: QUERY1_HOST.to_string(),
133 crumb_url: None,
134 cookie_prime_url: None,
135 isin_base_url: None,
136 news_base_url: None,
137 quote_page_base_url: None,
138 session_cookie: None,
139 session_crumb: None,
140 max_retries: 3,
141 retry_backoff: Duration::from_millis(500),
142 cache_ttl: Duration::ZERO,
143 api_preference: ApiPreference::default(),
144 underlying: None,
145 }
146 }
147}
148
149impl YfClientBuilder {
150 pub fn user_agent(mut self, ua: impl Into<String>) -> Self {
152 self.user_agent = Some(ua.into());
153 self
154 }
155
156 pub fn timeout(mut self, dur: Duration) -> Self {
158 self.timeout = dur;
159 self
160 }
161
162 pub fn max_retries(mut self, n: u32) -> Self {
164 self.max_retries = n;
165 self
166 }
167
168 pub fn retry_backoff(mut self, dur: Duration) -> Self {
170 self.retry_backoff = dur;
171 self
172 }
173
174 pub fn retry(mut self, cfg: RetryConfig) -> Self {
176 self.max_retries = cfg.max_retries;
177 self.retry_backoff = cfg.initial_backoff;
178 self
179 }
180
181 pub fn cache_ttl(mut self, ttl: Duration) -> Self {
185 self.cache_ttl = ttl;
186 self
187 }
188
189 pub fn api_preference(mut self, pref: ApiPreference) -> Self {
192 self.api_preference = pref;
193 self
194 }
195
196 pub fn base_host(mut self, host: impl Into<String>) -> Self {
199 self.base_query_host = host.into();
200 self
201 }
202
203 pub fn crumb_url(mut self, url: impl Into<String>) -> Self {
207 self.crumb_url = Some(url.into());
208 self
209 }
210
211 pub fn cookie_prime_url(mut self, url: impl Into<String>) -> Self {
214 self.cookie_prime_url = Some(url.into());
215 self
216 }
217
218 pub fn session_cookie(mut self, cookie_header: impl Into<String>) -> Self {
234 self.session_cookie = Some(cookie_header.into());
235 self
236 }
237
238 pub fn session_crumb(mut self, crumb: impl Into<String>) -> Self {
246 self.session_crumb = Some(crumb.into());
247 self
248 }
249
250 pub fn isin_base_url(mut self, url: impl Into<String>) -> Self {
253 self.isin_base_url = Some(url.into());
254 self
255 }
256
257 pub fn news_base_url(mut self, url: impl Into<String>) -> Self {
260 self.news_base_url = Some(url.into());
261 self
262 }
263
264 pub fn quote_page_base_url(mut self, url: impl Into<String>) -> Self {
267 self.quote_page_base_url = Some(url.into());
268 self
269 }
270
271 pub fn with_client(mut self, client: Client) -> Self {
275 self.underlying = Some(client);
276 self
277 }
278
279 pub fn build(self) -> Result<YfClient> {
281 let user_agent = self.user_agent.unwrap_or_else(|| {
282 USER_AGENTS
283 .choose(&mut rand::thread_rng())
284 .copied()
285 .unwrap_or(USER_AGENTS[0])
286 .to_string()
287 });
288
289 let inner = if let Some(c) = self.underlying {
290 c
291 } else {
292 Client::builder()
293 .cookie_store(true)
294 .gzip(true)
295 .timeout(self.timeout)
296 .user_agent(&user_agent)
297 .build()?
298 };
299
300 Ok(YfClient {
301 inner: Arc::new(Inner {
302 http: inner,
303 user_agent,
304 base_host: self.base_query_host,
305 crumb_url: self.crumb_url,
306 cookie_prime_url: self.cookie_prime_url,
307 isin_base_url: self
308 .isin_base_url
309 .unwrap_or_else(|| DEFAULT_ISIN_BASE.to_string()),
310 news_base_url: self
311 .news_base_url
312 .unwrap_or_else(|| DEFAULT_NEWS_BASE.to_string()),
313 quote_page_base_url: self
314 .quote_page_base_url
315 .unwrap_or_else(|| DEFAULT_QUOTE_PAGE_BASE.to_string()),
316 session_cookie: self.session_cookie,
317 session_crumb: self.session_crumb,
318 max_retries: self.max_retries,
319 retry_backoff: self.retry_backoff,
320 cache_ttl: self.cache_ttl,
321 api_preference: self.api_preference,
322 crumb_state: Mutex::new(()),
323 crumb: RwLock::new(None),
324 cache: RwLock::new(HashMap::new()),
325 }),
326 })
327 }
328}
329
330#[derive(Clone)]
335pub struct YfClient {
336 inner: Arc<Inner>,
337}
338
339impl std::fmt::Debug for YfClient {
340 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
341 f.debug_struct("YfClient")
342 .field("base_host", &self.inner.base_host)
343 .field("max_retries", &self.inner.max_retries)
344 .field("cache_ttl", &self.inner.cache_ttl)
345 .finish()
346 }
347}
348
349struct Inner {
350 http: Client,
351 user_agent: String,
352 base_host: String,
353 crumb_url: Option<String>,
354 cookie_prime_url: Option<String>,
355 isin_base_url: String,
356 news_base_url: String,
357 quote_page_base_url: String,
358 session_cookie: Option<String>,
359 session_crumb: Option<String>,
360 max_retries: u32,
361 retry_backoff: Duration,
362 cache_ttl: Duration,
363 api_preference: ApiPreference,
364 crumb_state: Mutex<()>,
366 crumb: RwLock<Option<CrumbState>>,
367 cache: RwLock<HashMap<String, CacheEntry>>,
368}
369
370#[derive(Clone)]
371struct CacheEntry {
372 body: bytes::Bytes,
373 inserted: Instant,
374}
375
376#[derive(Debug, Clone)]
377struct CrumbState {
378 crumb: String,
379 fetched_at: std::time::Instant,
380}
381
382impl YfClient {
383 pub fn new() -> Result<Self> {
385 YfClientBuilder::default().build()
386 }
387
388 pub fn builder() -> YfClientBuilder {
390 YfClientBuilder::default()
391 }
392
393 #[allow(dead_code)]
395 pub(crate) fn base_host(&self) -> &str {
396 &self.inner.base_host
397 }
398
399 pub(crate) fn isin_base_url(&self) -> &str {
401 &self.inner.isin_base_url
402 }
403
404 pub(crate) fn quote_page_base_url(&self) -> &str {
406 &self.inner.quote_page_base_url
407 }
408
409 pub fn api_preference(&self) -> ApiPreference {
411 self.inner.api_preference
412 }
413
414 pub(crate) fn news_post(&self, path: &str) -> reqwest::RequestBuilder {
417 let url = format!("{}{}", self.inner.news_base_url, path);
418 self.inner.http.post(url).headers(self.std_headers())
419 }
420
421 pub(crate) fn data_get(&self, path: &str) -> reqwest::RequestBuilder {
424 let url = format!("{}{}", self.inner.base_host, path);
425 self.inner.http.get(url).headers(self.std_headers())
426 }
427
428 pub(crate) fn raw_get(&self, url: &str) -> reqwest::RequestBuilder {
430 self.inner.http.get(url).headers(self.std_headers())
431 }
432
433 pub(crate) async fn send_text_recorded(
440 &self,
441 req: reqwest::RequestBuilder,
442 record_as: Option<(&str, &str)>,
443 ) -> Result<Option<String>> {
444 let resp = req.send().await?;
445 let status = resp.status();
446 let text = resp.text().await.unwrap_or_default();
447 if status.is_success() {
448 Self::maybe_record(record_as, text.as_bytes());
449 return Ok(Some(text));
450 }
451 if status == StatusCode::TOO_MANY_REQUESTS {
452 return Err(Error::RateLimited);
453 }
454 if status.is_server_error() || status.as_u16() == 404 {
455 return Err(Error::Status {
456 status: status.as_u16(),
457 message: text.chars().take(200).collect(),
458 });
459 }
460 Ok(None)
461 }
462
463 pub(crate) fn path_encode(s: &str) -> String {
466 s.chars()
467 .map(|c| match c {
468 'A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '-' | '=' | '^' | '_' => c.to_string(),
469 _ => format!("%{:02X}", c as u32),
470 })
471 .collect()
472 }
473
474 fn std_headers(&self) -> HeaderMap {
475 let mut h = HeaderMap::new();
476 h.insert(
477 USER_AGENT,
478 HeaderValue::from_str(&self.inner.user_agent).unwrap(),
479 );
480 h.insert(
481 ACCEPT,
482 HeaderValue::from_static("application/json, text/plain, */*"),
483 );
484 h.insert(ACCEPT_LANGUAGE, HeaderValue::from_static("en-US,en;q=0.9"));
485 h.insert(
486 REFERER,
487 HeaderValue::from_static("https://finance.yahoo.com/"),
488 );
489 if let Some(cookie) = self.inner.session_cookie.as_deref() {
490 if let Ok(v) = HeaderValue::from_str(cookie) {
491 h.insert(reqwest::header::COOKIE, v);
492 }
493 }
494 h
495 }
496
497 pub(crate) async fn get_json<T: DeserializeOwned>(
506 &self,
507 path: &str,
508 query: &[(&str, String)],
509 record_as: Option<(&str, &str)>,
510 ) -> Result<T> {
511 self.get_json_cached(path, query, record_as, CacheMode::Use)
512 .await
513 }
514
515 pub(crate) async fn get_json_cached<T: DeserializeOwned>(
518 &self,
519 path: &str,
520 query: &[(&str, String)],
521 record_as: Option<(&str, &str)>,
522 cache_mode: CacheMode,
523 ) -> Result<T> {
524 let body = self
525 .get_bytes(path, query, false, cache_mode)
526 .await?;
527 Self::maybe_record(record_as, &body);
528 serde_json::from_slice(&body).map_err(Error::from)
529 }
530
531 pub(crate) async fn get_json_crumb<T: DeserializeOwned>(
533 &self,
534 path: &str,
535 query: &[(&str, String)],
536 record_as: Option<(&str, &str)>,
537 ) -> Result<T> {
538 let body = self
539 .get_bytes(path, query, true, CacheMode::Use)
540 .await?;
541 Self::maybe_record(record_as, &body);
542 serde_json::from_slice(&body).map_err(Error::from)
543 }
544
545 pub(crate) async fn fetch_quote_summary(
549 &self,
550 symbol: &str,
551 modules: &str,
552 fixture_label: &str,
553 ) -> Result<Option<serde_json::Map<String, serde_json::Value>>> {
554 #[derive(serde::Deserialize)]
555 struct Envelope {
556 #[serde(rename = "quoteSummary")]
557 quote_summary: Inner,
558 }
559 #[derive(serde::Deserialize)]
560 struct Inner {
561 #[serde(default)]
562 result: Vec<serde_json::Map<String, serde_json::Value>>,
563 #[serde(default)]
564 error: Option<serde_json::Value>,
565 }
566
567 let path = format!("/v10/finance/quoteSummary/{}", Self::path_encode(symbol));
568 let q = vec![
569 ("modules", modules.to_string()),
570 ("formatted", "false".into()),
571 ("corsDomain", "finance.yahoo.com".into()),
572 ];
573 let env: Envelope = self
574 .get_json_crumb(&path, &q, Some((fixture_label, symbol)))
575 .await?;
576 if let Some(err) = env.quote_summary.error {
577 return Err(Error::Yahoo {
578 symbol: symbol.to_string(),
579 code: format!("{fixture_label}_error"),
580 description: err.to_string(),
581 });
582 }
583 Ok(env.quote_summary.result.into_iter().next())
584 }
585
586 fn maybe_record(label: Option<(&str, &str)>, body: &[u8]) {
587 #[cfg(feature = "test-mode")]
588 {
589 if let Some((endpoint, symbol)) = label {
590 if crate::test_fixtures::is_recording() {
591 if let Ok(text) = std::str::from_utf8(body) {
592 if let Err(e) =
593 crate::test_fixtures::record_fixture(endpoint, symbol, "json", text)
594 {
595 eprintln!("YF_RECORD: failed to write fixture for {symbol}: {e}");
596 }
597 }
598 }
599 }
600 }
601 #[cfg(not(feature = "test-mode"))]
602 {
603 let _ = (label, body);
604 }
605 }
606
607 async fn get_bytes(
608 &self,
609 path: &str,
610 query: &[(&str, String)],
611 needs_crumb: bool,
612 cache_mode: CacheMode,
613 ) -> Result<bytes::Bytes> {
614 let mut attempt: u32 = 0;
615 let mut backoff = self.inner.retry_backoff;
616
617 let mut q: Vec<(&str, String)> = query.iter().map(|(k, v)| (*k, v.clone())).collect();
618 if needs_crumb {
619 let crumb = self.ensure_crumb().await?;
620 q.push(("crumb", crumb));
621 }
622
623 let cache_active = self.inner.cache_ttl > Duration::ZERO && cache_mode != CacheMode::Bypass;
624 let cache_key =
625 cache_active.then(|| format!("{}{}?{}", self.inner.base_host, path, encode_query(&q)));
626 if let Some(key) = &cache_key {
627 if cache_mode == CacheMode::Use {
628 if let Some(body) = self.cache_lookup(key) {
629 return Ok(body);
630 }
631 }
632 }
633
634 loop {
635 let req = self.data_get(path).query(&q);
636 let res = req.send().await;
637 match res {
638 Ok(r) => match self.handle_response(r).await {
639 Ok(body) => {
640 if let Some(key) = &cache_key {
641 self.cache_store(key.clone(), body.clone());
642 }
643 return Ok(body);
644 }
645 Err(e) if attempt < self.inner.max_retries && is_retryable(&e) => {
646 log::debug!("retry {}: {}", attempt + 1, e);
647 sleep(backoff).await;
648 attempt += 1;
649 backoff = backoff.saturating_mul(2);
650 }
651 Err(e) => return Err(e),
652 },
653 Err(e) if attempt < self.inner.max_retries => {
654 log::debug!("retry {} (transport): {}", attempt + 1, e);
655 sleep(backoff).await;
656 attempt += 1;
657 backoff = backoff.saturating_mul(2);
658 }
659 Err(e) => return Err(e.into()),
660 }
661 }
662 }
663
664 async fn handle_response(&self, res: Response) -> Result<bytes::Bytes> {
665 let status = res.status();
666 if status.is_success() {
667 return Ok(res.bytes().await?);
668 }
669 if status == StatusCode::TOO_MANY_REQUESTS {
670 return Err(Error::RateLimited);
671 }
672 let body = res.text().await.unwrap_or_default();
673 let snippet = body.chars().take(200).collect::<String>();
674 Err(Error::Status {
675 status: status.as_u16(),
676 message: snippet,
677 })
678 }
679
680 fn cache_lookup(&self, key: &str) -> Option<bytes::Bytes> {
681 let entry = self.inner.cache.read().get(key).cloned()?;
682 if entry.inserted.elapsed() < self.inner.cache_ttl {
683 Some(entry.body)
684 } else {
685 None
686 }
687 }
688
689 fn cache_store(&self, key: String, body: bytes::Bytes) {
690 self.inner.cache.write().insert(
691 key,
692 CacheEntry {
693 body,
694 inserted: Instant::now(),
695 },
696 );
697 }
698
699 async fn ensure_crumb(&self) -> Result<String> {
701 if let Some(injected) = self.inner.session_crumb.clone() {
702 return Ok(injected);
703 }
704 if let Some(state) = self.inner.crumb.read().clone() {
705 if state.fetched_at.elapsed() < CRUMB_TTL {
706 return Ok(state.crumb);
707 }
708 }
709
710 let _guard = self.inner.crumb_state.lock().await;
713 if let Some(state) = self.inner.crumb.read().clone() {
715 if state.fetched_at.elapsed() < CRUMB_TTL {
716 return Ok(state.crumb);
717 }
718 }
719
720 let crumb = self.fetch_crumb().await?;
721 *self.inner.crumb.write() = Some(CrumbState {
722 crumb: crumb.clone(),
723 fetched_at: std::time::Instant::now(),
724 });
725 Ok(crumb)
726 }
727
728 async fn fetch_crumb(&self) -> Result<String> {
729 let prime = self
735 .inner
736 .cookie_prime_url
737 .as_deref()
738 .unwrap_or("https://fc.yahoo.com/consent");
739 if !prime.is_empty() {
740 let _ = self.raw_get(prime).send().await.ok();
741 }
742
743 let urls: Vec<String> = if let Some(u) = &self.inner.crumb_url {
745 vec![u.clone()]
746 } else {
747 vec![
748 format!("{}/v1/test/getcrumb", QUERY1_HOST),
749 format!("{}/v1/test/getcrumb", QUERY2_HOST),
750 ]
751 };
752 for url in &urls {
753 let res = self.raw_get(url).send().await;
754 let r = match res {
755 Ok(r) => r,
756 Err(_) => continue,
757 };
758 if !r.status().is_success() {
759 continue;
760 }
761 let text = r.text().await.unwrap_or_default();
762 let crumb = text.trim().trim_matches('"').to_string();
763 if !crumb.is_empty() && !crumb.contains('<') {
764 return Ok(crumb);
765 }
766 }
767
768 Err(Error::Auth(
769 "could not fetch crumb token from either query1 or query2".into(),
770 ))
771 }
772}
773
774fn encode_query(q: &[(&str, String)]) -> String {
775 use std::fmt::Write;
776 let mut buf = String::new();
777 for (i, (k, v)) in q.iter().enumerate() {
778 if i > 0 {
779 buf.push('&');
780 }
781 let _ = write!(&mut buf, "{k}={v}");
782 }
783 buf
784}
785
786fn is_retryable(err: &Error) -> bool {
787 match err {
788 Error::Http(e) => {
789 e.is_timeout() || e.is_connect() || matches!(e.status(), Some(s) if s.is_server_error())
790 }
791 Error::Status { status, .. } => *status >= 500,
792 Error::RateLimited => true,
793 _ => false,
794 }
795}
796
797#[cfg(test)]
798mod tests {
799 use super::*;
800
801 #[tokio::test]
802 async fn build_default() {
803 let c = YfClient::new().expect("build");
804 assert!(c.base_host().contains("yahoo"));
805 }
806
807 #[test]
808 fn retryable_classification() {
809 assert!(is_retryable(&Error::RateLimited));
810 assert!(is_retryable(&Error::Status {
811 status: 502,
812 message: String::new()
813 }));
814 assert!(!is_retryable(&Error::Status {
815 status: 404,
816 message: String::new()
817 }));
818 }
819}