1use feed_rs::parser;
32use futures::future::join_all;
33use serde::{Deserialize, Serialize};
34use std::collections::HashSet;
35use std::sync::OnceLock;
36use std::time::Duration;
37
38use crate::error::{FinanceError, Result};
39
40static FEED_UA: OnceLock<String> = OnceLock::new();
49
50const FEED_TIMEOUT_SECONDS: u64 = 30;
51
52fn feed_user_agent() -> &'static str {
53 FEED_UA.get_or_init(|| {
54 match std::env::var("EDGAR_EMAIL") {
59 Ok(email) if !email.trim().is_empty() => {
60 format!(
61 "finance-query/{} ({})",
62 env!("CARGO_PKG_VERSION"),
63 email.trim()
64 )
65 }
66 _ => concat!(
67 "finance-query/",
68 env!("CARGO_PKG_VERSION"),
69 " (+https://github.com/Verdenroz/finance-query)"
70 )
71 .to_string(),
72 }
73 })
74}
75
76fn build_feed_client() -> reqwest::Client {
77 reqwest::Client::builder()
78 .user_agent(feed_user_agent())
79 .timeout(Duration::from_secs(FEED_TIMEOUT_SECONDS))
80 .build()
81 .expect("failed to build feeds HTTP client")
82}
83
84#[derive(Debug, Clone)]
86#[non_exhaustive]
87pub enum FeedSource {
88 FederalReserve,
90 SecPressReleases,
92 SecFilings(String),
94 MarketWatch,
96 Cnbc,
98 Bloomberg,
100 FinancialTimes,
102 NytBusiness,
104 GuardianBusiness,
106 Investing,
108 Bea,
110 Ecb,
112 Cfpb,
114 WsjMarkets,
116 Fortune,
118 BusinessWire,
120 CoinDesk,
122 CoinTelegraph,
124 TechCrunch,
126 HackerNews,
128 OilPrice,
130 CalculatedRisk,
132 Scmp,
134 NikkeiAsia,
136 BankOfEngland,
138 VentureBeat,
140 YCombinator,
142 TheEconomist,
144 FinancialPost,
146 FtLex,
148 RitholtzBigPicture,
150 Custom(String),
152}
153
154impl FeedSource {
155 pub fn url(&self) -> String {
157 match self {
158 Self::FederalReserve => {
159 "https://www.federalreserve.gov/feeds/press_all.xml".to_string()
160 }
161 Self::SecPressReleases => "https://www.sec.gov/news/pressreleases.rss".to_string(),
162 Self::SecFilings(form_type) => format!(
163 "https://www.sec.gov/cgi-bin/browse-edgar?action=getcurrent&type={form_type}&output=atom"
164 ),
165 Self::MarketWatch => {
166 "https://feeds.content.dowjones.io/public/rss/mw_topstories".to_string()
167 }
168 Self::Cnbc => "https://www.cnbc.com/id/100003114/device/rss/rss.html".to_string(),
169 Self::Bloomberg => "https://feeds.bloomberg.com/markets/news.rss".to_string(),
170 Self::FinancialTimes => "https://www.ft.com/markets?format=rss".to_string(),
171 Self::NytBusiness => {
172 "https://rss.nytimes.com/services/xml/rss/nyt/Business.xml".to_string()
173 }
174 Self::GuardianBusiness => "https://www.theguardian.com/business/rss".to_string(),
175 Self::Investing => "https://www.investing.com/rss/news.rss".to_string(),
176 Self::Bea => "https://apps.bea.gov/rss/rss.xml".to_string(),
177 Self::Ecb => "https://www.ecb.europa.eu/rss/press.html".to_string(),
178 Self::Cfpb => "https://www.consumerfinance.gov/about-us/newsroom/feed/".to_string(),
179 Self::WsjMarkets => "https://feeds.a.dj.com/rss/RSSMarketsMain.xml".to_string(),
180 Self::Fortune => "https://fortune.com/feed".to_string(),
181 Self::BusinessWire => {
182 "https://feed.businesswire.com/rss/home/?rss=G1QFDERJXkJeGVtQXw==".to_string()
183 }
184 Self::CoinDesk => "https://www.coindesk.com/arc/outboundfeeds/rss/".to_string(),
185 Self::CoinTelegraph => "https://cointelegraph.com/rss".to_string(),
186 Self::TechCrunch => "https://techcrunch.com/feed/".to_string(),
187 Self::HackerNews => "https://hnrss.org/newest?points=100".to_string(),
188 Self::OilPrice => "https://oilprice.com/rss/main".to_string(),
189 Self::CalculatedRisk => "https://calculatedrisk.substack.com/feed".to_string(),
190 Self::Scmp => "https://www.scmp.com/rss/91/feed".to_string(),
191 Self::NikkeiAsia => "https://asia.nikkei.com/rss/feed/nar".to_string(),
192 Self::BankOfEngland => "https://www.bankofengland.co.uk/rss/news".to_string(),
193 Self::VentureBeat => "https://venturebeat.com/feed/".to_string(),
194 Self::YCombinator => "https://blog.ycombinator.com/feed/".to_string(),
195 Self::TheEconomist => {
196 "https://www.economist.com/sections/economics/rss.xml".to_string()
197 }
198 Self::FinancialPost => "https://financialpost.com/feed".to_string(),
199 Self::FtLex => "https://www.ft.com/lex?format=rss".to_string(),
200 Self::RitholtzBigPicture => "https://ritholtz.com/feed/".to_string(),
201 Self::Custom(url) => url.clone(),
202 }
203 }
204
205 pub fn name(&self) -> String {
207 match self {
208 Self::FederalReserve => "Federal Reserve".to_string(),
209 Self::SecPressReleases => "SEC".to_string(),
210 Self::SecFilings(form) => format!("SEC EDGAR ({form})"),
211 Self::MarketWatch => "MarketWatch".to_string(),
212 Self::Cnbc => "CNBC".to_string(),
213 Self::Bloomberg => "Bloomberg".to_string(),
214 Self::FinancialTimes => "Financial Times".to_string(),
215 Self::NytBusiness => "New York Times".to_string(),
216 Self::GuardianBusiness => "The Guardian".to_string(),
217 Self::Investing => "Investing.com".to_string(),
218 Self::Bea => "Bureau of Economic Analysis".to_string(),
219 Self::Ecb => "European Central Bank".to_string(),
220 Self::Cfpb => "CFPB".to_string(),
221 Self::WsjMarkets => "Wall Street Journal".to_string(),
222 Self::Fortune => "Fortune".to_string(),
223 Self::BusinessWire => "Business Wire".to_string(),
224 Self::CoinDesk => "CoinDesk".to_string(),
225 Self::CoinTelegraph => "CoinTelegraph".to_string(),
226 Self::TechCrunch => "TechCrunch".to_string(),
227 Self::HackerNews => "Hacker News".to_string(),
228 Self::OilPrice => "OilPrice.com".to_string(),
229 Self::CalculatedRisk => "Calculated Risk".to_string(),
230 Self::Scmp => "South China Morning Post".to_string(),
231 Self::NikkeiAsia => "Nikkei Asia".to_string(),
232 Self::BankOfEngland => "Bank of England".to_string(),
233 Self::VentureBeat => "VentureBeat".to_string(),
234 Self::YCombinator => "Y Combinator".to_string(),
235 Self::TheEconomist => "The Economist".to_string(),
236 Self::FinancialPost => "Financial Post".to_string(),
237 Self::FtLex => "Financial Times Lex".to_string(),
238 Self::RitholtzBigPicture => "The Big Picture".to_string(),
239 Self::Custom(url) => url.clone(),
240 }
241 }
242}
243
244#[derive(Debug, Clone, Serialize, Deserialize)]
246#[non_exhaustive]
247pub struct FeedEntry {
248 pub title: String,
250 pub url: String,
252 pub published: Option<String>,
254 pub summary: Option<String>,
256 pub source: String,
258}
259
260pub async fn fetch(source: FeedSource) -> Result<Vec<FeedEntry>> {
264 let client = build_feed_client();
265 fetch_with_client(&client, &source.url(), &source.name()).await
266}
267
268pub async fn fetch_all(sources: &[FeedSource]) -> Result<Vec<FeedEntry>> {
276 let client = build_feed_client();
277 let pairs: Vec<(String, String)> = sources.iter().map(|s| (s.url(), s.name())).collect();
278 let futures: Vec<_> = pairs
279 .iter()
280 .map(|(url, name)| fetch_with_client(&client, url, name))
281 .collect();
282
283 let results = join_all(futures).await;
284
285 let mut seen_urls: HashSet<String> = HashSet::new();
286 let mut entries: Vec<FeedEntry> = results
287 .into_iter()
288 .flat_map(|r| r.unwrap_or_default())
289 .filter(|e| seen_urls.insert(e.url.clone()))
290 .collect();
291
292 entries.sort_by(|a, b| b.published.cmp(&a.published));
294
295 Ok(entries)
296}
297
298async fn fetch_with_client(
299 client: &reqwest::Client,
300 url: &str,
301 source_name: &str,
302) -> Result<Vec<FeedEntry>> {
303 let source = source_name.to_string();
304
305 let text = client.get(url).send().await?.text().await?;
306
307 let feed = parser::parse(text.as_bytes()).map_err(|e| FinanceError::FeedParseError {
308 url: url.to_string(),
309 context: e.to_string(),
310 })?;
311
312 let entries = feed
313 .entries
314 .into_iter()
315 .filter_map(|entry| {
316 let title = entry.title.map(|t| t.content)?.trim().to_string();
317 if title.is_empty() {
318 return None;
319 }
320
321 let url_str = entry
322 .links
323 .into_iter()
324 .next()
325 .map(|l| l.href)
326 .unwrap_or_default();
327
328 if url_str.is_empty() {
329 return None;
330 }
331
332 let published = entry.published.or(entry.updated).map(|dt| dt.to_rfc3339());
333
334 let summary = entry
335 .summary
336 .map(|s| s.content)
337 .or_else(|| entry.content.and_then(|c| c.body));
338
339 Some(FeedEntry {
340 title,
341 url: url_str,
342 published,
343 summary,
344 source: source.clone(),
345 })
346 })
347 .collect();
348
349 Ok(entries)
350}
351
352#[cfg(test)]
353mod tests {
354 use super::*;
355
356 #[test]
357 fn test_feed_source_urls() {
358 assert!(FeedSource::FederalReserve.url().starts_with("https://"));
359 assert!(FeedSource::SecPressReleases.url().starts_with("https://"));
360 assert!(
361 FeedSource::SecFilings("10-K".to_string())
362 .url()
363 .contains("10-K")
364 );
365 assert_eq!(
366 FeedSource::Custom("https://example.com/feed.rss".to_string()).url(),
367 "https://example.com/feed.rss"
368 );
369 assert!(FeedSource::Bloomberg.url().starts_with("https://"));
370 assert!(FeedSource::FinancialTimes.url().starts_with("https://"));
371 assert!(FeedSource::NytBusiness.url().starts_with("https://"));
372 assert!(FeedSource::GuardianBusiness.url().starts_with("https://"));
373 assert!(FeedSource::Investing.url().starts_with("https://"));
374 assert!(FeedSource::Bea.url().starts_with("https://"));
375 assert!(FeedSource::Ecb.url().starts_with("https://"));
376 assert!(FeedSource::Cfpb.url().starts_with("https://"));
377 assert!(FeedSource::WsjMarkets.url().contains("dj.com"));
379 assert!(FeedSource::Fortune.url().contains("fortune.com"));
380 assert!(FeedSource::BusinessWire.url().contains("businesswire.com"));
381 assert!(FeedSource::CoinDesk.url().contains("coindesk.com"));
382 assert!(
383 FeedSource::CoinTelegraph
384 .url()
385 .contains("cointelegraph.com")
386 );
387 assert!(FeedSource::TechCrunch.url().contains("techcrunch.com"));
388 assert!(FeedSource::HackerNews.url().contains("hnrss.org"));
389 }
390
391 #[test]
392 fn test_feed_source_names() {
393 assert_eq!(FeedSource::FederalReserve.name(), "Federal Reserve");
394 assert_eq!(FeedSource::MarketWatch.name(), "MarketWatch");
395 assert_eq!(FeedSource::Bloomberg.name(), "Bloomberg");
396 assert_eq!(FeedSource::FinancialTimes.name(), "Financial Times");
397 assert_eq!(FeedSource::NytBusiness.name(), "New York Times");
398 assert_eq!(FeedSource::GuardianBusiness.name(), "The Guardian");
399 assert_eq!(FeedSource::Investing.name(), "Investing.com");
400 assert_eq!(FeedSource::Bea.name(), "Bureau of Economic Analysis");
401 assert_eq!(FeedSource::Ecb.name(), "European Central Bank");
402 assert_eq!(FeedSource::Cfpb.name(), "CFPB");
403 assert_eq!(FeedSource::WsjMarkets.name(), "Wall Street Journal");
405 assert_eq!(FeedSource::Fortune.name(), "Fortune");
406 assert_eq!(FeedSource::BusinessWire.name(), "Business Wire");
407 assert_eq!(FeedSource::CoinDesk.name(), "CoinDesk");
408 assert_eq!(FeedSource::CoinTelegraph.name(), "CoinTelegraph");
409 assert_eq!(FeedSource::TechCrunch.name(), "TechCrunch");
410 assert_eq!(FeedSource::HackerNews.name(), "Hacker News");
411 }
412
413 #[tokio::test]
414 #[ignore = "requires network access"]
415 async fn test_fetch_fed_reserve() {
416 let entries = fetch(FeedSource::FederalReserve).await;
417 assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
418 let entries = entries.unwrap();
419 assert!(!entries.is_empty());
420 for e in entries.iter().take(3) {
421 assert!(!e.title.is_empty());
422 assert!(!e.url.is_empty());
423 }
424 }
425
426 #[tokio::test]
427 #[ignore = "requires network access"]
428 async fn test_fetch_bloomberg() {
429 let entries = fetch(FeedSource::Bloomberg).await;
430 assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
431 let entries = entries.unwrap();
432 assert!(!entries.is_empty());
433 for e in entries.iter().take(3) {
434 assert!(!e.title.is_empty());
435 assert!(!e.url.is_empty());
436 assert_eq!(e.source, "Bloomberg");
437 }
438 }
439
440 #[tokio::test]
441 #[ignore = "requires network access"]
442 async fn test_fetch_financial_times() {
443 let entries = fetch(FeedSource::FinancialTimes).await;
444 assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
445 let entries = entries.unwrap();
446 assert!(!entries.is_empty());
447 }
448
449 #[tokio::test]
450 #[ignore = "requires network access"]
451 async fn test_fetch_bea() {
452 let entries = fetch(FeedSource::Bea).await;
453 assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
454 let entries = entries.unwrap();
455 assert!(!entries.is_empty());
456 assert_eq!(entries[0].source, "Bureau of Economic Analysis");
457 }
458
459 #[tokio::test]
460 #[ignore = "requires network access"]
461 async fn test_fetch_ecb() {
462 let entries = fetch(FeedSource::Ecb).await;
463 assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
464 let entries = entries.unwrap();
465 assert!(!entries.is_empty());
466 }
467}