Skip to main content

finance_query/feeds/
mod.rs

1//! RSS/Atom news feed aggregation.
2//!
3//! Requires the **`rss`** feature flag.
4//!
5//! Fetches and parses RSS/Atom feeds from named financial sources or arbitrary URLs.
6//! Multiple feeds can be fetched and merged in one call with automatic deduplication.
7//!
8//! # Quick Start
9//!
10//! ```no_run
11//! use finance_query::feeds::{self, FeedSource};
12//!
13//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
14//! // Fetch Federal Reserve press releases
15//! let fed_news = feeds::fetch(FeedSource::FederalReserve).await?;
16//! for entry in fed_news.iter().take(5) {
17//!     println!("{}: {}", entry.published.as_deref().unwrap_or("?"), entry.title);
18//! }
19//!
20//! // Aggregate multiple sources
21//! let news = feeds::fetch_all(&[
22//!     FeedSource::FederalReserve,
23//!     FeedSource::SecPressReleases,
24//!     FeedSource::MarketWatch,
25//! ]).await?;
26//! println!("Total entries: {}", news.len());
27//! # Ok(())
28//! # }
29//! ```
30
31use feed_rs::parser;
32use futures::future::join_all;
33use serde::{Deserialize, Serialize};
34use std::collections::HashSet;
35use std::sync::OnceLock;
36use std::time::Duration;
37
38use crate::error::{FinanceError, Result};
39
40/// Cached User-Agent string, computed once from the environment.
41///
42/// Only the configuration (UA string) is stored as a singleton — not the
43/// `reqwest::Client` itself. `reqwest::Client` internally spawns hyper
44/// connection-pool tasks on whichever tokio runtime first uses it; when that
45/// runtime is dropped (e.g. after a `#[tokio::test]`), those tasks die and
46/// subsequent calls from a different runtime receive `DispatchGone`. Caching
47/// only the UA avoids this while still computing the environment lookup once.
48static FEED_UA: OnceLock<String> = OnceLock::new();
49
50const FEED_TIMEOUT_SECONDS: u64 = 30;
51
52fn feed_user_agent() -> &'static str {
53    FEED_UA.get_or_init(|| {
54        // SEC EDGAR requires "app/version (email)" — nothing else in the UA.
55        // Other sites accept any reasonable UA. We use the email format when
56        // EDGAR_EMAIL is set (same env var as the edgar module), falling back
57        // to a github URL for environments without EDGAR configured.
58        match std::env::var("EDGAR_EMAIL") {
59            Ok(email) if !email.trim().is_empty() => {
60                format!(
61                    "finance-query/{} ({})",
62                    env!("CARGO_PKG_VERSION"),
63                    email.trim()
64                )
65            }
66            _ => concat!(
67                "finance-query/",
68                env!("CARGO_PKG_VERSION"),
69                " (+https://github.com/Verdenroz/finance-query)"
70            )
71            .to_string(),
72        }
73    })
74}
75
76fn build_feed_client() -> reqwest::Client {
77    reqwest::Client::builder()
78        .user_agent(feed_user_agent())
79        .timeout(Duration::from_secs(FEED_TIMEOUT_SECONDS))
80        .build()
81        .expect("failed to build feeds HTTP client")
82}
83
84/// A named or custom RSS/Atom feed source.
85#[derive(Debug, Clone)]
86#[non_exhaustive]
87pub enum FeedSource {
88    /// Federal Reserve press releases and speeches
89    FederalReserve,
90    /// SEC press releases (enforcement actions, rule changes)
91    SecPressReleases,
92    /// SEC EDGAR filing feed — specify form type (e.g., `"10-K"`, `"8-K"`)
93    SecFilings(String),
94    /// MarketWatch top stories
95    MarketWatch,
96    /// CNBC Markets
97    Cnbc,
98    /// Bloomberg Markets news
99    Bloomberg,
100    /// Financial Times Markets section
101    FinancialTimes,
102    /// The New York Times Business section
103    NytBusiness,
104    /// The Guardian Business section
105    GuardianBusiness,
106    /// Investing.com all news
107    Investing,
108    /// U.S. Bureau of Economic Analysis data releases
109    Bea,
110    /// European Central Bank press releases and speeches
111    Ecb,
112    /// Consumer Financial Protection Bureau newsroom
113    Cfpb,
114    /// Wall Street Journal Markets top stories
115    WsjMarkets,
116    /// Fortune — business and finance news
117    Fortune,
118    /// Business Wire — official corporate press releases (earnings, dividends, M&A)
119    BusinessWire,
120    /// CoinDesk — cryptocurrency and blockchain news
121    CoinDesk,
122    /// CoinTelegraph — cryptocurrency news and analysis
123    CoinTelegraph,
124    /// TechCrunch — startup, VC, and tech industry news
125    TechCrunch,
126    /// Hacker News — community-curated tech posts with 100+ points
127    HackerNews,
128    /// OilPrice.com — crude oil, natural gas, and energy geopolitics
129    OilPrice,
130    /// Calculated Risk — housing starts, mortgage rates, and macro data
131    CalculatedRisk,
132    /// South China Morning Post — China business, regulation, and trade
133    Scmp,
134    /// Nikkei Asia — Japanese and Asian business news
135    NikkeiAsia,
136    /// Bank of England — UK monetary policy, rate decisions, and regulatory notices
137    BankOfEngland,
138    /// VentureBeat — AI funding rounds and enterprise technology
139    VentureBeat,
140    /// Y Combinator Blog — startup ecosystem announcements (low-frequency)
141    YCombinator,
142    /// The Economist — global economics and market analysis
143    TheEconomist,
144    /// Financial Post — Canadian market and business news
145    FinancialPost,
146    /// Financial Times Lex — short daily market commentary column
147    FtLex,
148    /// The Big Picture (Ritholtz) — macro finance analysis and commentary
149    RitholtzBigPicture,
150    /// Custom feed URL
151    Custom(String),
152}
153
154impl FeedSource {
155    /// Return the URL for this feed source.
156    pub fn url(&self) -> String {
157        match self {
158            Self::FederalReserve => {
159                "https://www.federalreserve.gov/feeds/press_all.xml".to_string()
160            }
161            Self::SecPressReleases => "https://www.sec.gov/news/pressreleases.rss".to_string(),
162            Self::SecFilings(form_type) => format!(
163                "https://www.sec.gov/cgi-bin/browse-edgar?action=getcurrent&type={form_type}&output=atom"
164            ),
165            Self::MarketWatch => {
166                "https://feeds.content.dowjones.io/public/rss/mw_topstories".to_string()
167            }
168            Self::Cnbc => "https://www.cnbc.com/id/100003114/device/rss/rss.html".to_string(),
169            Self::Bloomberg => "https://feeds.bloomberg.com/markets/news.rss".to_string(),
170            Self::FinancialTimes => "https://www.ft.com/markets?format=rss".to_string(),
171            Self::NytBusiness => {
172                "https://rss.nytimes.com/services/xml/rss/nyt/Business.xml".to_string()
173            }
174            Self::GuardianBusiness => "https://www.theguardian.com/business/rss".to_string(),
175            Self::Investing => "https://www.investing.com/rss/news.rss".to_string(),
176            Self::Bea => "https://apps.bea.gov/rss/rss.xml".to_string(),
177            Self::Ecb => "https://www.ecb.europa.eu/rss/press.html".to_string(),
178            Self::Cfpb => "https://www.consumerfinance.gov/about-us/newsroom/feed/".to_string(),
179            Self::WsjMarkets => "https://feeds.a.dj.com/rss/RSSMarketsMain.xml".to_string(),
180            Self::Fortune => "https://fortune.com/feed".to_string(),
181            Self::BusinessWire => {
182                "https://feed.businesswire.com/rss/home/?rss=G1QFDERJXkJeGVtQXw==".to_string()
183            }
184            Self::CoinDesk => "https://www.coindesk.com/arc/outboundfeeds/rss/".to_string(),
185            Self::CoinTelegraph => "https://cointelegraph.com/rss".to_string(),
186            Self::TechCrunch => "https://techcrunch.com/feed/".to_string(),
187            Self::HackerNews => "https://hnrss.org/newest?points=100".to_string(),
188            Self::OilPrice => "https://oilprice.com/rss/main".to_string(),
189            Self::CalculatedRisk => "https://calculatedrisk.substack.com/feed".to_string(),
190            Self::Scmp => "https://www.scmp.com/rss/91/feed".to_string(),
191            Self::NikkeiAsia => "https://asia.nikkei.com/rss/feed/nar".to_string(),
192            Self::BankOfEngland => "https://www.bankofengland.co.uk/rss/news".to_string(),
193            Self::VentureBeat => "https://venturebeat.com/feed/".to_string(),
194            Self::YCombinator => "https://blog.ycombinator.com/feed/".to_string(),
195            Self::TheEconomist => {
196                "https://www.economist.com/sections/economics/rss.xml".to_string()
197            }
198            Self::FinancialPost => "https://financialpost.com/feed".to_string(),
199            Self::FtLex => "https://www.ft.com/lex?format=rss".to_string(),
200            Self::RitholtzBigPicture => "https://ritholtz.com/feed/".to_string(),
201            Self::Custom(url) => url.clone(),
202        }
203    }
204
205    /// Human-readable source name, used in [`FeedEntry::source`].
206    pub fn name(&self) -> String {
207        match self {
208            Self::FederalReserve => "Federal Reserve".to_string(),
209            Self::SecPressReleases => "SEC".to_string(),
210            Self::SecFilings(form) => format!("SEC EDGAR ({form})"),
211            Self::MarketWatch => "MarketWatch".to_string(),
212            Self::Cnbc => "CNBC".to_string(),
213            Self::Bloomberg => "Bloomberg".to_string(),
214            Self::FinancialTimes => "Financial Times".to_string(),
215            Self::NytBusiness => "New York Times".to_string(),
216            Self::GuardianBusiness => "The Guardian".to_string(),
217            Self::Investing => "Investing.com".to_string(),
218            Self::Bea => "Bureau of Economic Analysis".to_string(),
219            Self::Ecb => "European Central Bank".to_string(),
220            Self::Cfpb => "CFPB".to_string(),
221            Self::WsjMarkets => "Wall Street Journal".to_string(),
222            Self::Fortune => "Fortune".to_string(),
223            Self::BusinessWire => "Business Wire".to_string(),
224            Self::CoinDesk => "CoinDesk".to_string(),
225            Self::CoinTelegraph => "CoinTelegraph".to_string(),
226            Self::TechCrunch => "TechCrunch".to_string(),
227            Self::HackerNews => "Hacker News".to_string(),
228            Self::OilPrice => "OilPrice.com".to_string(),
229            Self::CalculatedRisk => "Calculated Risk".to_string(),
230            Self::Scmp => "South China Morning Post".to_string(),
231            Self::NikkeiAsia => "Nikkei Asia".to_string(),
232            Self::BankOfEngland => "Bank of England".to_string(),
233            Self::VentureBeat => "VentureBeat".to_string(),
234            Self::YCombinator => "Y Combinator".to_string(),
235            Self::TheEconomist => "The Economist".to_string(),
236            Self::FinancialPost => "Financial Post".to_string(),
237            Self::FtLex => "Financial Times Lex".to_string(),
238            Self::RitholtzBigPicture => "The Big Picture".to_string(),
239            Self::Custom(url) => url.clone(),
240        }
241    }
242}
243
244/// A single entry from an RSS/Atom feed.
245#[derive(Debug, Clone, Serialize, Deserialize)]
246#[non_exhaustive]
247pub struct FeedEntry {
248    /// Article or item title
249    pub title: String,
250    /// Canonical link to the article
251    pub url: String,
252    /// Publication date/time as an RFC 3339 string (if available)
253    pub published: Option<String>,
254    /// Short summary or description
255    pub summary: Option<String>,
256    /// Name of the feed source
257    pub source: String,
258}
259
260/// Fetch and parse a single feed source.
261///
262/// Returns an empty `Vec` (not an error) when the feed is reachable but empty.
263pub async fn fetch(source: FeedSource) -> Result<Vec<FeedEntry>> {
264    let client = build_feed_client();
265    fetch_with_client(&client, &source.url(), &source.name()).await
266}
267
268/// Fetch multiple feed sources concurrently and merge the results.
269///
270/// Results are deduplicated by URL and sorted newest-first when dates are available.
271/// Feeds that fail individually are skipped (not propagated as errors).
272///
273/// A single `reqwest::Client` is shared across all concurrent fetches within
274/// this call, reusing connection pools and TLS state.
275pub async fn fetch_all(sources: &[FeedSource]) -> Result<Vec<FeedEntry>> {
276    let client = build_feed_client();
277    let pairs: Vec<(String, String)> = sources.iter().map(|s| (s.url(), s.name())).collect();
278    let futures: Vec<_> = pairs
279        .iter()
280        .map(|(url, name)| fetch_with_client(&client, url, name))
281        .collect();
282
283    let results = join_all(futures).await;
284
285    let mut seen_urls: HashSet<String> = HashSet::new();
286    let mut entries: Vec<FeedEntry> = results
287        .into_iter()
288        .flat_map(|r| r.unwrap_or_default())
289        .filter(|e| seen_urls.insert(e.url.clone()))
290        .collect();
291
292    // Sort newest-first where dates are present
293    entries.sort_by(|a, b| b.published.cmp(&a.published));
294
295    Ok(entries)
296}
297
298async fn fetch_with_client(
299    client: &reqwest::Client,
300    url: &str,
301    source_name: &str,
302) -> Result<Vec<FeedEntry>> {
303    let source = source_name.to_string();
304
305    let text = client.get(url).send().await?.text().await?;
306
307    let feed = parser::parse(text.as_bytes()).map_err(|e| FinanceError::FeedParseError {
308        url: url.to_string(),
309        context: e.to_string(),
310    })?;
311
312    let entries = feed
313        .entries
314        .into_iter()
315        .filter_map(|entry| {
316            let title = entry.title.map(|t| t.content)?.trim().to_string();
317            if title.is_empty() {
318                return None;
319            }
320
321            let url_str = entry
322                .links
323                .into_iter()
324                .next()
325                .map(|l| l.href)
326                .unwrap_or_default();
327
328            if url_str.is_empty() {
329                return None;
330            }
331
332            let published = entry.published.or(entry.updated).map(|dt| dt.to_rfc3339());
333
334            let summary = entry
335                .summary
336                .map(|s| s.content)
337                .or_else(|| entry.content.and_then(|c| c.body));
338
339            Some(FeedEntry {
340                title,
341                url: url_str,
342                published,
343                summary,
344                source: source.clone(),
345            })
346        })
347        .collect();
348
349    Ok(entries)
350}
351
352#[cfg(test)]
353mod tests {
354    use super::*;
355
356    #[test]
357    fn test_feed_source_urls() {
358        assert!(FeedSource::FederalReserve.url().starts_with("https://"));
359        assert!(FeedSource::SecPressReleases.url().starts_with("https://"));
360        assert!(
361            FeedSource::SecFilings("10-K".to_string())
362                .url()
363                .contains("10-K")
364        );
365        assert_eq!(
366            FeedSource::Custom("https://example.com/feed.rss".to_string()).url(),
367            "https://example.com/feed.rss"
368        );
369        assert!(FeedSource::Bloomberg.url().starts_with("https://"));
370        assert!(FeedSource::FinancialTimes.url().starts_with("https://"));
371        assert!(FeedSource::NytBusiness.url().starts_with("https://"));
372        assert!(FeedSource::GuardianBusiness.url().starts_with("https://"));
373        assert!(FeedSource::Investing.url().starts_with("https://"));
374        assert!(FeedSource::Bea.url().starts_with("https://"));
375        assert!(FeedSource::Ecb.url().starts_with("https://"));
376        assert!(FeedSource::Cfpb.url().starts_with("https://"));
377        // New sources
378        assert!(FeedSource::WsjMarkets.url().contains("dj.com"));
379        assert!(FeedSource::Fortune.url().contains("fortune.com"));
380        assert!(FeedSource::BusinessWire.url().contains("businesswire.com"));
381        assert!(FeedSource::CoinDesk.url().contains("coindesk.com"));
382        assert!(
383            FeedSource::CoinTelegraph
384                .url()
385                .contains("cointelegraph.com")
386        );
387        assert!(FeedSource::TechCrunch.url().contains("techcrunch.com"));
388        assert!(FeedSource::HackerNews.url().contains("hnrss.org"));
389    }
390
391    #[test]
392    fn test_feed_source_names() {
393        assert_eq!(FeedSource::FederalReserve.name(), "Federal Reserve");
394        assert_eq!(FeedSource::MarketWatch.name(), "MarketWatch");
395        assert_eq!(FeedSource::Bloomberg.name(), "Bloomberg");
396        assert_eq!(FeedSource::FinancialTimes.name(), "Financial Times");
397        assert_eq!(FeedSource::NytBusiness.name(), "New York Times");
398        assert_eq!(FeedSource::GuardianBusiness.name(), "The Guardian");
399        assert_eq!(FeedSource::Investing.name(), "Investing.com");
400        assert_eq!(FeedSource::Bea.name(), "Bureau of Economic Analysis");
401        assert_eq!(FeedSource::Ecb.name(), "European Central Bank");
402        assert_eq!(FeedSource::Cfpb.name(), "CFPB");
403        // New sources
404        assert_eq!(FeedSource::WsjMarkets.name(), "Wall Street Journal");
405        assert_eq!(FeedSource::Fortune.name(), "Fortune");
406        assert_eq!(FeedSource::BusinessWire.name(), "Business Wire");
407        assert_eq!(FeedSource::CoinDesk.name(), "CoinDesk");
408        assert_eq!(FeedSource::CoinTelegraph.name(), "CoinTelegraph");
409        assert_eq!(FeedSource::TechCrunch.name(), "TechCrunch");
410        assert_eq!(FeedSource::HackerNews.name(), "Hacker News");
411    }
412
413    #[tokio::test]
414    #[ignore = "requires network access"]
415    async fn test_fetch_fed_reserve() {
416        let entries = fetch(FeedSource::FederalReserve).await;
417        assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
418        let entries = entries.unwrap();
419        assert!(!entries.is_empty());
420        for e in entries.iter().take(3) {
421            assert!(!e.title.is_empty());
422            assert!(!e.url.is_empty());
423        }
424    }
425
426    #[tokio::test]
427    #[ignore = "requires network access"]
428    async fn test_fetch_bloomberg() {
429        let entries = fetch(FeedSource::Bloomberg).await;
430        assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
431        let entries = entries.unwrap();
432        assert!(!entries.is_empty());
433        for e in entries.iter().take(3) {
434            assert!(!e.title.is_empty());
435            assert!(!e.url.is_empty());
436            assert_eq!(e.source, "Bloomberg");
437        }
438    }
439
440    #[tokio::test]
441    #[ignore = "requires network access"]
442    async fn test_fetch_financial_times() {
443        let entries = fetch(FeedSource::FinancialTimes).await;
444        assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
445        let entries = entries.unwrap();
446        assert!(!entries.is_empty());
447    }
448
449    #[tokio::test]
450    #[ignore = "requires network access"]
451    async fn test_fetch_bea() {
452        let entries = fetch(FeedSource::Bea).await;
453        assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
454        let entries = entries.unwrap();
455        assert!(!entries.is_empty());
456        assert_eq!(entries[0].source, "Bureau of Economic Analysis");
457    }
458
459    #[tokio::test]
460    #[ignore = "requires network access"]
461    async fn test_fetch_ecb() {
462        let entries = fetch(FeedSource::Ecb).await;
463        assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
464        let entries = entries.unwrap();
465        assert!(!entries.is_empty());
466    }
467}