Skip to main content

finance_query/feeds/
mod.rs

1//! RSS/Atom news feed aggregation.
2//!
3//! Requires the **`rss`** feature flag.
4//!
5//! Fetches and parses RSS/Atom feeds from named financial sources or arbitrary URLs.
6//! Multiple feeds can be fetched and merged in one call with automatic deduplication.
7//!
8//! # Quick Start
9//!
10//! ```no_run
11//! use finance_query::feeds::{self, FeedSource};
12//!
13//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
14//! // Fetch Federal Reserve press releases
15//! let fed_news = feeds::fetch(FeedSource::FederalReserve).await?;
16//! for entry in fed_news.iter().take(5) {
17//!     println!("{}: {}", entry.published.as_deref().unwrap_or("?"), entry.title);
18//! }
19//!
20//! // Aggregate multiple sources
21//! let news = feeds::fetch_all(&[
22//!     FeedSource::FederalReserve,
23//!     FeedSource::SecPressReleases,
24//!     FeedSource::MarketWatch,
25//! ]).await?;
26//! println!("Total entries: {}", news.len());
27//! # Ok(())
28//! # }
29//! ```
30
31use feed_rs::parser;
32use futures::future::join_all;
33use serde::{Deserialize, Serialize};
34use std::collections::HashSet;
35use std::sync::OnceLock;
36
37use crate::error::{FinanceError, Result};
38
39/// Cached User-Agent string, computed once from the environment.
40///
41/// Only the configuration (UA string) is stored as a singleton — not the
42/// `reqwest::Client` itself. `reqwest::Client` internally spawns hyper
43/// connection-pool tasks on whichever tokio runtime first uses it; when that
44/// runtime is dropped (e.g. after a `#[tokio::test]`), those tasks die and
45/// subsequent calls from a different runtime receive `DispatchGone`. Caching
46/// only the UA avoids this while still computing the environment lookup once.
47static FEED_UA: OnceLock<String> = OnceLock::new();
48
49fn feed_user_agent() -> &'static str {
50    FEED_UA.get_or_init(|| {
51        // SEC EDGAR requires "app/version (email)" — nothing else in the UA.
52        // Other sites accept any reasonable UA. We use the email format when
53        // EDGAR_EMAIL is set (same env var as the edgar module), falling back
54        // to a github URL for environments without EDGAR configured.
55        match std::env::var("EDGAR_EMAIL") {
56            Ok(email) if !email.trim().is_empty() => {
57                format!(
58                    "finance-query/{} ({})",
59                    env!("CARGO_PKG_VERSION"),
60                    email.trim()
61                )
62            }
63            _ => concat!(
64                "finance-query/",
65                env!("CARGO_PKG_VERSION"),
66                " (+https://github.com/Verdenroz/finance-query)"
67            )
68            .to_string(),
69        }
70    })
71}
72
73fn build_feed_client() -> reqwest::Client {
74    reqwest::Client::builder()
75        .user_agent(feed_user_agent())
76        .build()
77        .expect("failed to build feeds HTTP client")
78}
79
80/// A named or custom RSS/Atom feed source.
81#[derive(Debug, Clone)]
82#[non_exhaustive]
83pub enum FeedSource {
84    /// Federal Reserve press releases and speeches
85    FederalReserve,
86    /// SEC press releases (enforcement actions, rule changes)
87    SecPressReleases,
88    /// SEC EDGAR filing feed — specify form type (e.g., `"10-K"`, `"8-K"`)
89    SecFilings(String),
90    /// MarketWatch top stories
91    MarketWatch,
92    /// CNBC Markets
93    Cnbc,
94    /// Bloomberg Markets news
95    Bloomberg,
96    /// Financial Times Markets section
97    FinancialTimes,
98    /// The New York Times Business section
99    NytBusiness,
100    /// The Guardian Business section
101    GuardianBusiness,
102    /// Investing.com all news
103    Investing,
104    /// U.S. Bureau of Economic Analysis data releases
105    Bea,
106    /// European Central Bank press releases and speeches
107    Ecb,
108    /// Consumer Financial Protection Bureau newsroom
109    Cfpb,
110    /// Wall Street Journal Markets top stories
111    WsjMarkets,
112    /// Fortune — business and finance news
113    Fortune,
114    /// Business Wire — official corporate press releases (earnings, dividends, M&A)
115    BusinessWire,
116    /// CoinDesk — cryptocurrency and blockchain news
117    CoinDesk,
118    /// CoinTelegraph — cryptocurrency news and analysis
119    CoinTelegraph,
120    /// TechCrunch — startup, VC, and tech industry news
121    TechCrunch,
122    /// Hacker News — community-curated tech posts with 100+ points
123    HackerNews,
124    /// OilPrice.com — crude oil, natural gas, and energy geopolitics
125    OilPrice,
126    /// Calculated Risk — housing starts, mortgage rates, and macro data
127    CalculatedRisk,
128    /// South China Morning Post — China business, regulation, and trade
129    Scmp,
130    /// Nikkei Asia — Japanese and Asian business news
131    NikkeiAsia,
132    /// Bank of England — UK monetary policy, rate decisions, and regulatory notices
133    BankOfEngland,
134    /// VentureBeat — AI funding rounds and enterprise technology
135    VentureBeat,
136    /// Y Combinator Blog — startup ecosystem announcements (low-frequency)
137    YCombinator,
138    /// The Economist — global economics and market analysis
139    TheEconomist,
140    /// Financial Post — Canadian market and business news
141    FinancialPost,
142    /// Financial Times Lex — short daily market commentary column
143    FtLex,
144    /// The Big Picture (Ritholtz) — macro finance analysis and commentary
145    RitholtzBigPicture,
146    /// Custom feed URL
147    Custom(String),
148}
149
150impl FeedSource {
151    /// Return the URL for this feed source.
152    pub fn url(&self) -> String {
153        match self {
154            Self::FederalReserve => {
155                "https://www.federalreserve.gov/feeds/press_all.xml".to_string()
156            }
157            Self::SecPressReleases => "https://www.sec.gov/news/pressreleases.rss".to_string(),
158            Self::SecFilings(form_type) => format!(
159                "https://www.sec.gov/cgi-bin/browse-edgar?action=getcurrent&type={form_type}&output=atom"
160            ),
161            Self::MarketWatch => {
162                "https://feeds.content.dowjones.io/public/rss/mw_topstories".to_string()
163            }
164            Self::Cnbc => "https://www.cnbc.com/id/100003114/device/rss/rss.html".to_string(),
165            Self::Bloomberg => "https://feeds.bloomberg.com/markets/news.rss".to_string(),
166            Self::FinancialTimes => "https://www.ft.com/markets?format=rss".to_string(),
167            Self::NytBusiness => {
168                "https://rss.nytimes.com/services/xml/rss/nyt/Business.xml".to_string()
169            }
170            Self::GuardianBusiness => "https://www.theguardian.com/business/rss".to_string(),
171            Self::Investing => "https://www.investing.com/rss/news.rss".to_string(),
172            Self::Bea => "https://apps.bea.gov/rss/rss.xml".to_string(),
173            Self::Ecb => "https://www.ecb.europa.eu/rss/press.html".to_string(),
174            Self::Cfpb => "https://www.consumerfinance.gov/about-us/newsroom/feed/".to_string(),
175            Self::WsjMarkets => "https://feeds.a.dj.com/rss/RSSMarketsMain.xml".to_string(),
176            Self::Fortune => "https://fortune.com/feed".to_string(),
177            Self::BusinessWire => {
178                "https://feed.businesswire.com/rss/home/?rss=G1QFDERJXkJeGVtQXw==".to_string()
179            }
180            Self::CoinDesk => "https://www.coindesk.com/arc/outboundfeeds/rss/".to_string(),
181            Self::CoinTelegraph => "https://cointelegraph.com/rss".to_string(),
182            Self::TechCrunch => "https://techcrunch.com/feed/".to_string(),
183            Self::HackerNews => "https://hnrss.org/newest?points=100".to_string(),
184            Self::OilPrice => "https://oilprice.com/rss/main".to_string(),
185            Self::CalculatedRisk => "https://calculatedrisk.substack.com/feed".to_string(),
186            Self::Scmp => "https://www.scmp.com/rss/91/feed".to_string(),
187            Self::NikkeiAsia => "https://asia.nikkei.com/rss/feed/nar".to_string(),
188            Self::BankOfEngland => "https://www.bankofengland.co.uk/rss/news".to_string(),
189            Self::VentureBeat => "https://venturebeat.com/feed/".to_string(),
190            Self::YCombinator => "https://blog.ycombinator.com/feed/".to_string(),
191            Self::TheEconomist => {
192                "https://www.economist.com/sections/economics/rss.xml".to_string()
193            }
194            Self::FinancialPost => "https://financialpost.com/feed".to_string(),
195            Self::FtLex => "https://www.ft.com/lex?format=rss".to_string(),
196            Self::RitholtzBigPicture => "https://ritholtz.com/feed/".to_string(),
197            Self::Custom(url) => url.clone(),
198        }
199    }
200
201    /// Human-readable source name, used in [`FeedEntry::source`].
202    pub fn name(&self) -> String {
203        match self {
204            Self::FederalReserve => "Federal Reserve".to_string(),
205            Self::SecPressReleases => "SEC".to_string(),
206            Self::SecFilings(form) => format!("SEC EDGAR ({form})"),
207            Self::MarketWatch => "MarketWatch".to_string(),
208            Self::Cnbc => "CNBC".to_string(),
209            Self::Bloomberg => "Bloomberg".to_string(),
210            Self::FinancialTimes => "Financial Times".to_string(),
211            Self::NytBusiness => "New York Times".to_string(),
212            Self::GuardianBusiness => "The Guardian".to_string(),
213            Self::Investing => "Investing.com".to_string(),
214            Self::Bea => "Bureau of Economic Analysis".to_string(),
215            Self::Ecb => "European Central Bank".to_string(),
216            Self::Cfpb => "CFPB".to_string(),
217            Self::WsjMarkets => "Wall Street Journal".to_string(),
218            Self::Fortune => "Fortune".to_string(),
219            Self::BusinessWire => "Business Wire".to_string(),
220            Self::CoinDesk => "CoinDesk".to_string(),
221            Self::CoinTelegraph => "CoinTelegraph".to_string(),
222            Self::TechCrunch => "TechCrunch".to_string(),
223            Self::HackerNews => "Hacker News".to_string(),
224            Self::OilPrice => "OilPrice.com".to_string(),
225            Self::CalculatedRisk => "Calculated Risk".to_string(),
226            Self::Scmp => "South China Morning Post".to_string(),
227            Self::NikkeiAsia => "Nikkei Asia".to_string(),
228            Self::BankOfEngland => "Bank of England".to_string(),
229            Self::VentureBeat => "VentureBeat".to_string(),
230            Self::YCombinator => "Y Combinator".to_string(),
231            Self::TheEconomist => "The Economist".to_string(),
232            Self::FinancialPost => "Financial Post".to_string(),
233            Self::FtLex => "Financial Times Lex".to_string(),
234            Self::RitholtzBigPicture => "The Big Picture".to_string(),
235            Self::Custom(url) => url.clone(),
236        }
237    }
238}
239
240/// A single entry from an RSS/Atom feed.
241#[derive(Debug, Clone, Serialize, Deserialize)]
242#[non_exhaustive]
243pub struct FeedEntry {
244    /// Article or item title
245    pub title: String,
246    /// Canonical link to the article
247    pub url: String,
248    /// Publication date/time as an RFC 3339 string (if available)
249    pub published: Option<String>,
250    /// Short summary or description
251    pub summary: Option<String>,
252    /// Name of the feed source
253    pub source: String,
254}
255
256/// Fetch and parse a single feed source.
257///
258/// Returns an empty `Vec` (not an error) when the feed is reachable but empty.
259pub async fn fetch(source: FeedSource) -> Result<Vec<FeedEntry>> {
260    let client = build_feed_client();
261    fetch_with_client(&client, &source.url(), &source.name()).await
262}
263
264/// Fetch multiple feed sources concurrently and merge the results.
265///
266/// Results are deduplicated by URL and sorted newest-first when dates are available.
267/// Feeds that fail individually are skipped (not propagated as errors).
268///
269/// A single `reqwest::Client` is shared across all concurrent fetches within
270/// this call, reusing connection pools and TLS state.
271pub async fn fetch_all(sources: &[FeedSource]) -> Result<Vec<FeedEntry>> {
272    let client = build_feed_client();
273    let pairs: Vec<(String, String)> = sources.iter().map(|s| (s.url(), s.name())).collect();
274    let futures: Vec<_> = pairs
275        .iter()
276        .map(|(url, name)| fetch_with_client(&client, url, name))
277        .collect();
278
279    let results = join_all(futures).await;
280
281    let mut seen_urls: HashSet<String> = HashSet::new();
282    let mut entries: Vec<FeedEntry> = results
283        .into_iter()
284        .flat_map(|r| r.unwrap_or_default())
285        .filter(|e| seen_urls.insert(e.url.clone()))
286        .collect();
287
288    // Sort newest-first where dates are present
289    entries.sort_by(|a, b| b.published.cmp(&a.published));
290
291    Ok(entries)
292}
293
294async fn fetch_with_client(
295    client: &reqwest::Client,
296    url: &str,
297    source_name: &str,
298) -> Result<Vec<FeedEntry>> {
299    let source = source_name.to_string();
300
301    let text = client
302        .get(url)
303        .send()
304        .await
305        .map_err(FinanceError::HttpError)?
306        .text()
307        .await
308        .map_err(FinanceError::HttpError)?;
309
310    let feed = parser::parse(text.as_bytes()).map_err(|e| FinanceError::FeedParseError {
311        url: url.to_string(),
312        context: e.to_string(),
313    })?;
314
315    let entries = feed
316        .entries
317        .into_iter()
318        .filter_map(|entry| {
319            let title = entry.title.map(|t| t.content)?.trim().to_string();
320            if title.is_empty() {
321                return None;
322            }
323
324            let url_str = entry
325                .links
326                .into_iter()
327                .next()
328                .map(|l| l.href)
329                .unwrap_or_default();
330
331            if url_str.is_empty() {
332                return None;
333            }
334
335            let published = entry.published.or(entry.updated).map(|dt| dt.to_rfc3339());
336
337            let summary = entry
338                .summary
339                .map(|s| s.content)
340                .or_else(|| entry.content.and_then(|c| c.body));
341
342            Some(FeedEntry {
343                title,
344                url: url_str,
345                published,
346                summary,
347                source: source.clone(),
348            })
349        })
350        .collect();
351
352    Ok(entries)
353}
354
355#[cfg(test)]
356mod tests {
357    use super::*;
358
359    #[test]
360    fn test_feed_source_urls() {
361        assert!(FeedSource::FederalReserve.url().starts_with("https://"));
362        assert!(FeedSource::SecPressReleases.url().starts_with("https://"));
363        assert!(
364            FeedSource::SecFilings("10-K".to_string())
365                .url()
366                .contains("10-K")
367        );
368        assert_eq!(
369            FeedSource::Custom("https://example.com/feed.rss".to_string()).url(),
370            "https://example.com/feed.rss"
371        );
372        assert!(FeedSource::Bloomberg.url().starts_with("https://"));
373        assert!(FeedSource::FinancialTimes.url().starts_with("https://"));
374        assert!(FeedSource::NytBusiness.url().starts_with("https://"));
375        assert!(FeedSource::GuardianBusiness.url().starts_with("https://"));
376        assert!(FeedSource::Investing.url().starts_with("https://"));
377        assert!(FeedSource::Bea.url().starts_with("https://"));
378        assert!(FeedSource::Ecb.url().starts_with("https://"));
379        assert!(FeedSource::Cfpb.url().starts_with("https://"));
380        // New sources
381        assert!(FeedSource::WsjMarkets.url().contains("dj.com"));
382        assert!(FeedSource::Fortune.url().contains("fortune.com"));
383        assert!(FeedSource::BusinessWire.url().contains("businesswire.com"));
384        assert!(FeedSource::CoinDesk.url().contains("coindesk.com"));
385        assert!(
386            FeedSource::CoinTelegraph
387                .url()
388                .contains("cointelegraph.com")
389        );
390        assert!(FeedSource::TechCrunch.url().contains("techcrunch.com"));
391        assert!(FeedSource::HackerNews.url().contains("hnrss.org"));
392    }
393
394    #[test]
395    fn test_feed_source_names() {
396        assert_eq!(FeedSource::FederalReserve.name(), "Federal Reserve");
397        assert_eq!(FeedSource::MarketWatch.name(), "MarketWatch");
398        assert_eq!(FeedSource::Bloomberg.name(), "Bloomberg");
399        assert_eq!(FeedSource::FinancialTimes.name(), "Financial Times");
400        assert_eq!(FeedSource::NytBusiness.name(), "New York Times");
401        assert_eq!(FeedSource::GuardianBusiness.name(), "The Guardian");
402        assert_eq!(FeedSource::Investing.name(), "Investing.com");
403        assert_eq!(FeedSource::Bea.name(), "Bureau of Economic Analysis");
404        assert_eq!(FeedSource::Ecb.name(), "European Central Bank");
405        assert_eq!(FeedSource::Cfpb.name(), "CFPB");
406        // New sources
407        assert_eq!(FeedSource::WsjMarkets.name(), "Wall Street Journal");
408        assert_eq!(FeedSource::Fortune.name(), "Fortune");
409        assert_eq!(FeedSource::BusinessWire.name(), "Business Wire");
410        assert_eq!(FeedSource::CoinDesk.name(), "CoinDesk");
411        assert_eq!(FeedSource::CoinTelegraph.name(), "CoinTelegraph");
412        assert_eq!(FeedSource::TechCrunch.name(), "TechCrunch");
413        assert_eq!(FeedSource::HackerNews.name(), "Hacker News");
414    }
415
416    #[tokio::test]
417    #[ignore = "requires network access"]
418    async fn test_fetch_fed_reserve() {
419        let entries = fetch(FeedSource::FederalReserve).await;
420        assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
421        let entries = entries.unwrap();
422        assert!(!entries.is_empty());
423        for e in entries.iter().take(3) {
424            assert!(!e.title.is_empty());
425            assert!(!e.url.is_empty());
426        }
427    }
428
429    #[tokio::test]
430    #[ignore = "requires network access"]
431    async fn test_fetch_bloomberg() {
432        let entries = fetch(FeedSource::Bloomberg).await;
433        assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
434        let entries = entries.unwrap();
435        assert!(!entries.is_empty());
436        for e in entries.iter().take(3) {
437            assert!(!e.title.is_empty());
438            assert!(!e.url.is_empty());
439            assert_eq!(e.source, "Bloomberg");
440        }
441    }
442
443    #[tokio::test]
444    #[ignore = "requires network access"]
445    async fn test_fetch_financial_times() {
446        let entries = fetch(FeedSource::FinancialTimes).await;
447        assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
448        let entries = entries.unwrap();
449        assert!(!entries.is_empty());
450    }
451
452    #[tokio::test]
453    #[ignore = "requires network access"]
454    async fn test_fetch_bea() {
455        let entries = fetch(FeedSource::Bea).await;
456        assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
457        let entries = entries.unwrap();
458        assert!(!entries.is_empty());
459        assert_eq!(entries[0].source, "Bureau of Economic Analysis");
460    }
461
462    #[tokio::test]
463    #[ignore = "requires network access"]
464    async fn test_fetch_ecb() {
465        let entries = fetch(FeedSource::Ecb).await;
466        assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
467        let entries = entries.unwrap();
468        assert!(!entries.is_empty());
469    }
470}