1use feed_rs::parser;
32use futures::future::join_all;
33use serde::{Deserialize, Serialize};
34use std::collections::HashSet;
35use std::sync::OnceLock;
36
37use crate::error::{FinanceError, Result};
38
39static FEED_UA: OnceLock<String> = OnceLock::new();
48
49fn feed_user_agent() -> &'static str {
50 FEED_UA.get_or_init(|| {
51 match std::env::var("EDGAR_EMAIL") {
56 Ok(email) if !email.trim().is_empty() => {
57 format!(
58 "finance-query/{} ({})",
59 env!("CARGO_PKG_VERSION"),
60 email.trim()
61 )
62 }
63 _ => concat!(
64 "finance-query/",
65 env!("CARGO_PKG_VERSION"),
66 " (+https://github.com/Verdenroz/finance-query)"
67 )
68 .to_string(),
69 }
70 })
71}
72
73fn build_feed_client() -> reqwest::Client {
74 reqwest::Client::builder()
75 .user_agent(feed_user_agent())
76 .build()
77 .expect("failed to build feeds HTTP client")
78}
79
80#[derive(Debug, Clone)]
82#[non_exhaustive]
83pub enum FeedSource {
84 FederalReserve,
86 SecPressReleases,
88 SecFilings(String),
90 MarketWatch,
92 Cnbc,
94 Bloomberg,
96 FinancialTimes,
98 NytBusiness,
100 GuardianBusiness,
102 Investing,
104 Bea,
106 Ecb,
108 Cfpb,
110 WsjMarkets,
112 Fortune,
114 BusinessWire,
116 CoinDesk,
118 CoinTelegraph,
120 TechCrunch,
122 HackerNews,
124 OilPrice,
126 CalculatedRisk,
128 Scmp,
130 NikkeiAsia,
132 BankOfEngland,
134 VentureBeat,
136 YCombinator,
138 TheEconomist,
140 FinancialPost,
142 FtLex,
144 RitholtzBigPicture,
146 Custom(String),
148}
149
150impl FeedSource {
151 pub fn url(&self) -> String {
153 match self {
154 Self::FederalReserve => {
155 "https://www.federalreserve.gov/feeds/press_all.xml".to_string()
156 }
157 Self::SecPressReleases => "https://www.sec.gov/news/pressreleases.rss".to_string(),
158 Self::SecFilings(form_type) => format!(
159 "https://www.sec.gov/cgi-bin/browse-edgar?action=getcurrent&type={form_type}&output=atom"
160 ),
161 Self::MarketWatch => {
162 "https://feeds.content.dowjones.io/public/rss/mw_topstories".to_string()
163 }
164 Self::Cnbc => "https://www.cnbc.com/id/100003114/device/rss/rss.html".to_string(),
165 Self::Bloomberg => "https://feeds.bloomberg.com/markets/news.rss".to_string(),
166 Self::FinancialTimes => "https://www.ft.com/markets?format=rss".to_string(),
167 Self::NytBusiness => {
168 "https://rss.nytimes.com/services/xml/rss/nyt/Business.xml".to_string()
169 }
170 Self::GuardianBusiness => "https://www.theguardian.com/business/rss".to_string(),
171 Self::Investing => "https://www.investing.com/rss/news.rss".to_string(),
172 Self::Bea => "https://apps.bea.gov/rss/rss.xml".to_string(),
173 Self::Ecb => "https://www.ecb.europa.eu/rss/press.html".to_string(),
174 Self::Cfpb => "https://www.consumerfinance.gov/about-us/newsroom/feed/".to_string(),
175 Self::WsjMarkets => "https://feeds.a.dj.com/rss/RSSMarketsMain.xml".to_string(),
176 Self::Fortune => "https://fortune.com/feed".to_string(),
177 Self::BusinessWire => {
178 "https://feed.businesswire.com/rss/home/?rss=G1QFDERJXkJeGVtQXw==".to_string()
179 }
180 Self::CoinDesk => "https://www.coindesk.com/arc/outboundfeeds/rss/".to_string(),
181 Self::CoinTelegraph => "https://cointelegraph.com/rss".to_string(),
182 Self::TechCrunch => "https://techcrunch.com/feed/".to_string(),
183 Self::HackerNews => "https://hnrss.org/newest?points=100".to_string(),
184 Self::OilPrice => "https://oilprice.com/rss/main".to_string(),
185 Self::CalculatedRisk => "https://calculatedrisk.substack.com/feed".to_string(),
186 Self::Scmp => "https://www.scmp.com/rss/91/feed".to_string(),
187 Self::NikkeiAsia => "https://asia.nikkei.com/rss/feed/nar".to_string(),
188 Self::BankOfEngland => "https://www.bankofengland.co.uk/rss/news".to_string(),
189 Self::VentureBeat => "https://venturebeat.com/feed/".to_string(),
190 Self::YCombinator => "https://blog.ycombinator.com/feed/".to_string(),
191 Self::TheEconomist => {
192 "https://www.economist.com/sections/economics/rss.xml".to_string()
193 }
194 Self::FinancialPost => "https://financialpost.com/feed".to_string(),
195 Self::FtLex => "https://www.ft.com/lex?format=rss".to_string(),
196 Self::RitholtzBigPicture => "https://ritholtz.com/feed/".to_string(),
197 Self::Custom(url) => url.clone(),
198 }
199 }
200
201 pub fn name(&self) -> String {
203 match self {
204 Self::FederalReserve => "Federal Reserve".to_string(),
205 Self::SecPressReleases => "SEC".to_string(),
206 Self::SecFilings(form) => format!("SEC EDGAR ({form})"),
207 Self::MarketWatch => "MarketWatch".to_string(),
208 Self::Cnbc => "CNBC".to_string(),
209 Self::Bloomberg => "Bloomberg".to_string(),
210 Self::FinancialTimes => "Financial Times".to_string(),
211 Self::NytBusiness => "New York Times".to_string(),
212 Self::GuardianBusiness => "The Guardian".to_string(),
213 Self::Investing => "Investing.com".to_string(),
214 Self::Bea => "Bureau of Economic Analysis".to_string(),
215 Self::Ecb => "European Central Bank".to_string(),
216 Self::Cfpb => "CFPB".to_string(),
217 Self::WsjMarkets => "Wall Street Journal".to_string(),
218 Self::Fortune => "Fortune".to_string(),
219 Self::BusinessWire => "Business Wire".to_string(),
220 Self::CoinDesk => "CoinDesk".to_string(),
221 Self::CoinTelegraph => "CoinTelegraph".to_string(),
222 Self::TechCrunch => "TechCrunch".to_string(),
223 Self::HackerNews => "Hacker News".to_string(),
224 Self::OilPrice => "OilPrice.com".to_string(),
225 Self::CalculatedRisk => "Calculated Risk".to_string(),
226 Self::Scmp => "South China Morning Post".to_string(),
227 Self::NikkeiAsia => "Nikkei Asia".to_string(),
228 Self::BankOfEngland => "Bank of England".to_string(),
229 Self::VentureBeat => "VentureBeat".to_string(),
230 Self::YCombinator => "Y Combinator".to_string(),
231 Self::TheEconomist => "The Economist".to_string(),
232 Self::FinancialPost => "Financial Post".to_string(),
233 Self::FtLex => "Financial Times Lex".to_string(),
234 Self::RitholtzBigPicture => "The Big Picture".to_string(),
235 Self::Custom(url) => url.clone(),
236 }
237 }
238}
239
240#[derive(Debug, Clone, Serialize, Deserialize)]
242#[non_exhaustive]
243pub struct FeedEntry {
244 pub title: String,
246 pub url: String,
248 pub published: Option<String>,
250 pub summary: Option<String>,
252 pub source: String,
254}
255
256pub async fn fetch(source: FeedSource) -> Result<Vec<FeedEntry>> {
260 let client = build_feed_client();
261 fetch_with_client(&client, &source.url(), &source.name()).await
262}
263
264pub async fn fetch_all(sources: &[FeedSource]) -> Result<Vec<FeedEntry>> {
272 let client = build_feed_client();
273 let pairs: Vec<(String, String)> = sources.iter().map(|s| (s.url(), s.name())).collect();
274 let futures: Vec<_> = pairs
275 .iter()
276 .map(|(url, name)| fetch_with_client(&client, url, name))
277 .collect();
278
279 let results = join_all(futures).await;
280
281 let mut seen_urls: HashSet<String> = HashSet::new();
282 let mut entries: Vec<FeedEntry> = results
283 .into_iter()
284 .flat_map(|r| r.unwrap_or_default())
285 .filter(|e| seen_urls.insert(e.url.clone()))
286 .collect();
287
288 entries.sort_by(|a, b| b.published.cmp(&a.published));
290
291 Ok(entries)
292}
293
294async fn fetch_with_client(
295 client: &reqwest::Client,
296 url: &str,
297 source_name: &str,
298) -> Result<Vec<FeedEntry>> {
299 let source = source_name.to_string();
300
301 let text = client
302 .get(url)
303 .send()
304 .await
305 .map_err(FinanceError::HttpError)?
306 .text()
307 .await
308 .map_err(FinanceError::HttpError)?;
309
310 let feed = parser::parse(text.as_bytes()).map_err(|e| FinanceError::FeedParseError {
311 url: url.to_string(),
312 context: e.to_string(),
313 })?;
314
315 let entries = feed
316 .entries
317 .into_iter()
318 .filter_map(|entry| {
319 let title = entry.title.map(|t| t.content)?.trim().to_string();
320 if title.is_empty() {
321 return None;
322 }
323
324 let url_str = entry
325 .links
326 .into_iter()
327 .next()
328 .map(|l| l.href)
329 .unwrap_or_default();
330
331 if url_str.is_empty() {
332 return None;
333 }
334
335 let published = entry.published.or(entry.updated).map(|dt| dt.to_rfc3339());
336
337 let summary = entry
338 .summary
339 .map(|s| s.content)
340 .or_else(|| entry.content.and_then(|c| c.body));
341
342 Some(FeedEntry {
343 title,
344 url: url_str,
345 published,
346 summary,
347 source: source.clone(),
348 })
349 })
350 .collect();
351
352 Ok(entries)
353}
354
355#[cfg(test)]
356mod tests {
357 use super::*;
358
359 #[test]
360 fn test_feed_source_urls() {
361 assert!(FeedSource::FederalReserve.url().starts_with("https://"));
362 assert!(FeedSource::SecPressReleases.url().starts_with("https://"));
363 assert!(
364 FeedSource::SecFilings("10-K".to_string())
365 .url()
366 .contains("10-K")
367 );
368 assert_eq!(
369 FeedSource::Custom("https://example.com/feed.rss".to_string()).url(),
370 "https://example.com/feed.rss"
371 );
372 assert!(FeedSource::Bloomberg.url().starts_with("https://"));
373 assert!(FeedSource::FinancialTimes.url().starts_with("https://"));
374 assert!(FeedSource::NytBusiness.url().starts_with("https://"));
375 assert!(FeedSource::GuardianBusiness.url().starts_with("https://"));
376 assert!(FeedSource::Investing.url().starts_with("https://"));
377 assert!(FeedSource::Bea.url().starts_with("https://"));
378 assert!(FeedSource::Ecb.url().starts_with("https://"));
379 assert!(FeedSource::Cfpb.url().starts_with("https://"));
380 assert!(FeedSource::WsjMarkets.url().contains("dj.com"));
382 assert!(FeedSource::Fortune.url().contains("fortune.com"));
383 assert!(FeedSource::BusinessWire.url().contains("businesswire.com"));
384 assert!(FeedSource::CoinDesk.url().contains("coindesk.com"));
385 assert!(
386 FeedSource::CoinTelegraph
387 .url()
388 .contains("cointelegraph.com")
389 );
390 assert!(FeedSource::TechCrunch.url().contains("techcrunch.com"));
391 assert!(FeedSource::HackerNews.url().contains("hnrss.org"));
392 }
393
394 #[test]
395 fn test_feed_source_names() {
396 assert_eq!(FeedSource::FederalReserve.name(), "Federal Reserve");
397 assert_eq!(FeedSource::MarketWatch.name(), "MarketWatch");
398 assert_eq!(FeedSource::Bloomberg.name(), "Bloomberg");
399 assert_eq!(FeedSource::FinancialTimes.name(), "Financial Times");
400 assert_eq!(FeedSource::NytBusiness.name(), "New York Times");
401 assert_eq!(FeedSource::GuardianBusiness.name(), "The Guardian");
402 assert_eq!(FeedSource::Investing.name(), "Investing.com");
403 assert_eq!(FeedSource::Bea.name(), "Bureau of Economic Analysis");
404 assert_eq!(FeedSource::Ecb.name(), "European Central Bank");
405 assert_eq!(FeedSource::Cfpb.name(), "CFPB");
406 assert_eq!(FeedSource::WsjMarkets.name(), "Wall Street Journal");
408 assert_eq!(FeedSource::Fortune.name(), "Fortune");
409 assert_eq!(FeedSource::BusinessWire.name(), "Business Wire");
410 assert_eq!(FeedSource::CoinDesk.name(), "CoinDesk");
411 assert_eq!(FeedSource::CoinTelegraph.name(), "CoinTelegraph");
412 assert_eq!(FeedSource::TechCrunch.name(), "TechCrunch");
413 assert_eq!(FeedSource::HackerNews.name(), "Hacker News");
414 }
415
416 #[tokio::test]
417 #[ignore = "requires network access"]
418 async fn test_fetch_fed_reserve() {
419 let entries = fetch(FeedSource::FederalReserve).await;
420 assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
421 let entries = entries.unwrap();
422 assert!(!entries.is_empty());
423 for e in entries.iter().take(3) {
424 assert!(!e.title.is_empty());
425 assert!(!e.url.is_empty());
426 }
427 }
428
429 #[tokio::test]
430 #[ignore = "requires network access"]
431 async fn test_fetch_bloomberg() {
432 let entries = fetch(FeedSource::Bloomberg).await;
433 assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
434 let entries = entries.unwrap();
435 assert!(!entries.is_empty());
436 for e in entries.iter().take(3) {
437 assert!(!e.title.is_empty());
438 assert!(!e.url.is_empty());
439 assert_eq!(e.source, "Bloomberg");
440 }
441 }
442
443 #[tokio::test]
444 #[ignore = "requires network access"]
445 async fn test_fetch_financial_times() {
446 let entries = fetch(FeedSource::FinancialTimes).await;
447 assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
448 let entries = entries.unwrap();
449 assert!(!entries.is_empty());
450 }
451
452 #[tokio::test]
453 #[ignore = "requires network access"]
454 async fn test_fetch_bea() {
455 let entries = fetch(FeedSource::Bea).await;
456 assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
457 let entries = entries.unwrap();
458 assert!(!entries.is_empty());
459 assert_eq!(entries[0].source, "Bureau of Economic Analysis");
460 }
461
462 #[tokio::test]
463 #[ignore = "requires network access"]
464 async fn test_fetch_ecb() {
465 let entries = fetch(FeedSource::Ecb).await;
466 assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
467 let entries = entries.unwrap();
468 assert!(!entries.is_empty());
469 }
470}