finance-query 2.5.0

A Rust library for querying financial data
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
//! RSS/Atom news feed aggregation.
//!
//! Requires the **`rss`** feature flag.
//!
//! Fetches and parses RSS/Atom feeds from named financial sources or arbitrary URLs.
//! Multiple feeds can be fetched and merged in one call with automatic deduplication.
//!
//! # Quick Start
//!
//! ```no_run
//! use finance_query::feeds::{self, FeedSource};
//!
//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
//! // Fetch Federal Reserve press releases
//! let fed_news = feeds::fetch(FeedSource::FederalReserve).await?;
//! for entry in fed_news.iter().take(5) {
//!     println!("{}: {}", entry.published.as_deref().unwrap_or("?"), entry.title);
//! }
//!
//! // Aggregate multiple sources
//! let news = feeds::fetch_all(&[
//!     FeedSource::FederalReserve,
//!     FeedSource::SecPressReleases,
//!     FeedSource::MarketWatch,
//! ]).await?;
//! println!("Total entries: {}", news.len());
//! # Ok(())
//! # }
//! ```

use feed_rs::parser;
use futures::future::join_all;
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use std::sync::OnceLock;
use std::time::Duration;

use crate::error::{FinanceError, Result};

/// Cached User-Agent string, computed once from the environment.
///
/// Only the configuration (UA string) is stored as a singleton — not the
/// `reqwest::Client` itself. `reqwest::Client` internally spawns hyper
/// connection-pool tasks on whichever tokio runtime first uses it; when that
/// runtime is dropped (e.g. after a `#[tokio::test]`), those tasks die and
/// subsequent calls from a different runtime receive `DispatchGone`. Caching
/// only the UA avoids this while still computing the environment lookup once.
static FEED_UA: OnceLock<String> = OnceLock::new();

const FEED_TIMEOUT_SECONDS: u64 = 30;

fn feed_user_agent() -> &'static str {
    FEED_UA.get_or_init(|| {
        // SEC EDGAR requires "app/version (email)" — nothing else in the UA.
        // Other sites accept any reasonable UA. We use the email format when
        // EDGAR_EMAIL is set (same env var as the edgar module), falling back
        // to a github URL for environments without EDGAR configured.
        match std::env::var("EDGAR_EMAIL") {
            Ok(email) if !email.trim().is_empty() => {
                format!(
                    "finance-query/{} ({})",
                    env!("CARGO_PKG_VERSION"),
                    email.trim()
                )
            }
            _ => concat!(
                "finance-query/",
                env!("CARGO_PKG_VERSION"),
                " (+https://github.com/Verdenroz/finance-query)"
            )
            .to_string(),
        }
    })
}

fn build_feed_client() -> reqwest::Client {
    reqwest::Client::builder()
        .user_agent(feed_user_agent())
        .timeout(Duration::from_secs(FEED_TIMEOUT_SECONDS))
        .build()
        .expect("failed to build feeds HTTP client")
}

/// A named or custom RSS/Atom feed source.
#[derive(Debug, Clone)]
#[non_exhaustive]
pub enum FeedSource {
    /// Federal Reserve press releases and speeches
    FederalReserve,
    /// SEC press releases (enforcement actions, rule changes)
    SecPressReleases,
    /// SEC EDGAR filing feed — specify form type (e.g., `"10-K"`, `"8-K"`)
    SecFilings(String),
    /// MarketWatch top stories
    MarketWatch,
    /// CNBC Markets
    Cnbc,
    /// Bloomberg Markets news
    Bloomberg,
    /// Financial Times Markets section
    FinancialTimes,
    /// The New York Times Business section
    NytBusiness,
    /// The Guardian Business section
    GuardianBusiness,
    /// Investing.com all news
    Investing,
    /// U.S. Bureau of Economic Analysis data releases
    Bea,
    /// European Central Bank press releases and speeches
    Ecb,
    /// Consumer Financial Protection Bureau newsroom
    Cfpb,
    /// Wall Street Journal Markets top stories
    WsjMarkets,
    /// Fortune — business and finance news
    Fortune,
    /// Business Wire — official corporate press releases (earnings, dividends, M&A)
    BusinessWire,
    /// CoinDesk — cryptocurrency and blockchain news
    CoinDesk,
    /// CoinTelegraph — cryptocurrency news and analysis
    CoinTelegraph,
    /// TechCrunch — startup, VC, and tech industry news
    TechCrunch,
    /// Hacker News — community-curated tech posts with 100+ points
    HackerNews,
    /// OilPrice.com — crude oil, natural gas, and energy geopolitics
    OilPrice,
    /// Calculated Risk — housing starts, mortgage rates, and macro data
    CalculatedRisk,
    /// South China Morning Post — China business, regulation, and trade
    Scmp,
    /// Nikkei Asia — Japanese and Asian business news
    NikkeiAsia,
    /// Bank of England — UK monetary policy, rate decisions, and regulatory notices
    BankOfEngland,
    /// VentureBeat — AI funding rounds and enterprise technology
    VentureBeat,
    /// Y Combinator Blog — startup ecosystem announcements (low-frequency)
    YCombinator,
    /// The Economist — global economics and market analysis
    TheEconomist,
    /// Financial Post — Canadian market and business news
    FinancialPost,
    /// Financial Times Lex — short daily market commentary column
    FtLex,
    /// The Big Picture (Ritholtz) — macro finance analysis and commentary
    RitholtzBigPicture,
    /// Custom feed URL
    Custom(String),
}

impl FeedSource {
    /// Return the URL for this feed source.
    pub fn url(&self) -> String {
        match self {
            Self::FederalReserve => {
                "https://www.federalreserve.gov/feeds/press_all.xml".to_string()
            }
            Self::SecPressReleases => "https://www.sec.gov/news/pressreleases.rss".to_string(),
            Self::SecFilings(form_type) => format!(
                "https://www.sec.gov/cgi-bin/browse-edgar?action=getcurrent&type={form_type}&output=atom"
            ),
            Self::MarketWatch => {
                "https://feeds.content.dowjones.io/public/rss/mw_topstories".to_string()
            }
            Self::Cnbc => "https://www.cnbc.com/id/100003114/device/rss/rss.html".to_string(),
            Self::Bloomberg => "https://feeds.bloomberg.com/markets/news.rss".to_string(),
            Self::FinancialTimes => "https://www.ft.com/markets?format=rss".to_string(),
            Self::NytBusiness => {
                "https://rss.nytimes.com/services/xml/rss/nyt/Business.xml".to_string()
            }
            Self::GuardianBusiness => "https://www.theguardian.com/business/rss".to_string(),
            Self::Investing => "https://www.investing.com/rss/news.rss".to_string(),
            Self::Bea => "https://apps.bea.gov/rss/rss.xml".to_string(),
            Self::Ecb => "https://www.ecb.europa.eu/rss/press.html".to_string(),
            Self::Cfpb => "https://www.consumerfinance.gov/about-us/newsroom/feed/".to_string(),
            Self::WsjMarkets => "https://feeds.a.dj.com/rss/RSSMarketsMain.xml".to_string(),
            Self::Fortune => "https://fortune.com/feed".to_string(),
            Self::BusinessWire => {
                "https://feed.businesswire.com/rss/home/?rss=G1QFDERJXkJeGVtQXw==".to_string()
            }
            Self::CoinDesk => "https://www.coindesk.com/arc/outboundfeeds/rss/".to_string(),
            Self::CoinTelegraph => "https://cointelegraph.com/rss".to_string(),
            Self::TechCrunch => "https://techcrunch.com/feed/".to_string(),
            Self::HackerNews => "https://hnrss.org/newest?points=100".to_string(),
            Self::OilPrice => "https://oilprice.com/rss/main".to_string(),
            Self::CalculatedRisk => "https://calculatedrisk.substack.com/feed".to_string(),
            Self::Scmp => "https://www.scmp.com/rss/91/feed".to_string(),
            Self::NikkeiAsia => "https://asia.nikkei.com/rss/feed/nar".to_string(),
            Self::BankOfEngland => "https://www.bankofengland.co.uk/rss/news".to_string(),
            Self::VentureBeat => "https://venturebeat.com/feed/".to_string(),
            Self::YCombinator => "https://blog.ycombinator.com/feed/".to_string(),
            Self::TheEconomist => {
                "https://www.economist.com/sections/economics/rss.xml".to_string()
            }
            Self::FinancialPost => "https://financialpost.com/feed".to_string(),
            Self::FtLex => "https://www.ft.com/lex?format=rss".to_string(),
            Self::RitholtzBigPicture => "https://ritholtz.com/feed/".to_string(),
            Self::Custom(url) => url.clone(),
        }
    }

    /// Human-readable source name, used in [`FeedEntry::source`].
    pub fn name(&self) -> String {
        match self {
            Self::FederalReserve => "Federal Reserve".to_string(),
            Self::SecPressReleases => "SEC".to_string(),
            Self::SecFilings(form) => format!("SEC EDGAR ({form})"),
            Self::MarketWatch => "MarketWatch".to_string(),
            Self::Cnbc => "CNBC".to_string(),
            Self::Bloomberg => "Bloomberg".to_string(),
            Self::FinancialTimes => "Financial Times".to_string(),
            Self::NytBusiness => "New York Times".to_string(),
            Self::GuardianBusiness => "The Guardian".to_string(),
            Self::Investing => "Investing.com".to_string(),
            Self::Bea => "Bureau of Economic Analysis".to_string(),
            Self::Ecb => "European Central Bank".to_string(),
            Self::Cfpb => "CFPB".to_string(),
            Self::WsjMarkets => "Wall Street Journal".to_string(),
            Self::Fortune => "Fortune".to_string(),
            Self::BusinessWire => "Business Wire".to_string(),
            Self::CoinDesk => "CoinDesk".to_string(),
            Self::CoinTelegraph => "CoinTelegraph".to_string(),
            Self::TechCrunch => "TechCrunch".to_string(),
            Self::HackerNews => "Hacker News".to_string(),
            Self::OilPrice => "OilPrice.com".to_string(),
            Self::CalculatedRisk => "Calculated Risk".to_string(),
            Self::Scmp => "South China Morning Post".to_string(),
            Self::NikkeiAsia => "Nikkei Asia".to_string(),
            Self::BankOfEngland => "Bank of England".to_string(),
            Self::VentureBeat => "VentureBeat".to_string(),
            Self::YCombinator => "Y Combinator".to_string(),
            Self::TheEconomist => "The Economist".to_string(),
            Self::FinancialPost => "Financial Post".to_string(),
            Self::FtLex => "Financial Times Lex".to_string(),
            Self::RitholtzBigPicture => "The Big Picture".to_string(),
            Self::Custom(url) => url.clone(),
        }
    }
}

/// A single entry from an RSS/Atom feed.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[non_exhaustive]
pub struct FeedEntry {
    /// Article or item title
    pub title: String,
    /// Canonical link to the article
    pub url: String,
    /// Publication date/time as an RFC 3339 string (if available)
    pub published: Option<String>,
    /// Short summary or description
    pub summary: Option<String>,
    /// Name of the feed source
    pub source: String,
}

/// Fetch and parse a single feed source.
///
/// Returns an empty `Vec` (not an error) when the feed is reachable but empty.
pub async fn fetch(source: FeedSource) -> Result<Vec<FeedEntry>> {
    let client = build_feed_client();
    fetch_with_client(&client, &source.url(), &source.name()).await
}

/// Fetch multiple feed sources concurrently and merge the results.
///
/// Results are deduplicated by URL and sorted newest-first when dates are available.
/// Feeds that fail individually are skipped (not propagated as errors).
///
/// A single `reqwest::Client` is shared across all concurrent fetches within
/// this call, reusing connection pools and TLS state.
pub async fn fetch_all(sources: &[FeedSource]) -> Result<Vec<FeedEntry>> {
    let client = build_feed_client();
    let pairs: Vec<(String, String)> = sources.iter().map(|s| (s.url(), s.name())).collect();
    let futures: Vec<_> = pairs
        .iter()
        .map(|(url, name)| fetch_with_client(&client, url, name))
        .collect();

    let results = join_all(futures).await;

    let mut seen_urls: HashSet<String> = HashSet::new();
    let mut entries: Vec<FeedEntry> = results
        .into_iter()
        .flat_map(|r| r.unwrap_or_default())
        .filter(|e| seen_urls.insert(e.url.clone()))
        .collect();

    // Sort newest-first where dates are present
    entries.sort_by(|a, b| b.published.cmp(&a.published));

    Ok(entries)
}

async fn fetch_with_client(
    client: &reqwest::Client,
    url: &str,
    source_name: &str,
) -> Result<Vec<FeedEntry>> {
    let source = source_name.to_string();

    let text = client.get(url).send().await?.text().await?;

    let feed = parser::parse(text.as_bytes()).map_err(|e| FinanceError::FeedParseError {
        url: url.to_string(),
        context: e.to_string(),
    })?;

    let entries = feed
        .entries
        .into_iter()
        .filter_map(|entry| {
            let title = entry.title.map(|t| t.content)?.trim().to_string();
            if title.is_empty() {
                return None;
            }

            let url_str = entry
                .links
                .into_iter()
                .next()
                .map(|l| l.href)
                .unwrap_or_default();

            if url_str.is_empty() {
                return None;
            }

            let published = entry.published.or(entry.updated).map(|dt| dt.to_rfc3339());

            let summary = entry
                .summary
                .map(|s| s.content)
                .or_else(|| entry.content.and_then(|c| c.body));

            Some(FeedEntry {
                title,
                url: url_str,
                published,
                summary,
                source: source.clone(),
            })
        })
        .collect();

    Ok(entries)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_feed_source_urls() {
        assert!(FeedSource::FederalReserve.url().starts_with("https://"));
        assert!(FeedSource::SecPressReleases.url().starts_with("https://"));
        assert!(
            FeedSource::SecFilings("10-K".to_string())
                .url()
                .contains("10-K")
        );
        assert_eq!(
            FeedSource::Custom("https://example.com/feed.rss".to_string()).url(),
            "https://example.com/feed.rss"
        );
        assert!(FeedSource::Bloomberg.url().starts_with("https://"));
        assert!(FeedSource::FinancialTimes.url().starts_with("https://"));
        assert!(FeedSource::NytBusiness.url().starts_with("https://"));
        assert!(FeedSource::GuardianBusiness.url().starts_with("https://"));
        assert!(FeedSource::Investing.url().starts_with("https://"));
        assert!(FeedSource::Bea.url().starts_with("https://"));
        assert!(FeedSource::Ecb.url().starts_with("https://"));
        assert!(FeedSource::Cfpb.url().starts_with("https://"));
        // New sources
        assert!(FeedSource::WsjMarkets.url().contains("dj.com"));
        assert!(FeedSource::Fortune.url().contains("fortune.com"));
        assert!(FeedSource::BusinessWire.url().contains("businesswire.com"));
        assert!(FeedSource::CoinDesk.url().contains("coindesk.com"));
        assert!(
            FeedSource::CoinTelegraph
                .url()
                .contains("cointelegraph.com")
        );
        assert!(FeedSource::TechCrunch.url().contains("techcrunch.com"));
        assert!(FeedSource::HackerNews.url().contains("hnrss.org"));
    }

    #[test]
    fn test_feed_source_names() {
        assert_eq!(FeedSource::FederalReserve.name(), "Federal Reserve");
        assert_eq!(FeedSource::MarketWatch.name(), "MarketWatch");
        assert_eq!(FeedSource::Bloomberg.name(), "Bloomberg");
        assert_eq!(FeedSource::FinancialTimes.name(), "Financial Times");
        assert_eq!(FeedSource::NytBusiness.name(), "New York Times");
        assert_eq!(FeedSource::GuardianBusiness.name(), "The Guardian");
        assert_eq!(FeedSource::Investing.name(), "Investing.com");
        assert_eq!(FeedSource::Bea.name(), "Bureau of Economic Analysis");
        assert_eq!(FeedSource::Ecb.name(), "European Central Bank");
        assert_eq!(FeedSource::Cfpb.name(), "CFPB");
        // New sources
        assert_eq!(FeedSource::WsjMarkets.name(), "Wall Street Journal");
        assert_eq!(FeedSource::Fortune.name(), "Fortune");
        assert_eq!(FeedSource::BusinessWire.name(), "Business Wire");
        assert_eq!(FeedSource::CoinDesk.name(), "CoinDesk");
        assert_eq!(FeedSource::CoinTelegraph.name(), "CoinTelegraph");
        assert_eq!(FeedSource::TechCrunch.name(), "TechCrunch");
        assert_eq!(FeedSource::HackerNews.name(), "Hacker News");
    }

    #[tokio::test]
    #[ignore = "requires network access"]
    async fn test_fetch_fed_reserve() {
        let entries = fetch(FeedSource::FederalReserve).await;
        assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
        let entries = entries.unwrap();
        assert!(!entries.is_empty());
        for e in entries.iter().take(3) {
            assert!(!e.title.is_empty());
            assert!(!e.url.is_empty());
        }
    }

    #[tokio::test]
    #[ignore = "requires network access"]
    async fn test_fetch_bloomberg() {
        let entries = fetch(FeedSource::Bloomberg).await;
        assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
        let entries = entries.unwrap();
        assert!(!entries.is_empty());
        for e in entries.iter().take(3) {
            assert!(!e.title.is_empty());
            assert!(!e.url.is_empty());
            assert_eq!(e.source, "Bloomberg");
        }
    }

    #[tokio::test]
    #[ignore = "requires network access"]
    async fn test_fetch_financial_times() {
        let entries = fetch(FeedSource::FinancialTimes).await;
        assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
        let entries = entries.unwrap();
        assert!(!entries.is_empty());
    }

    #[tokio::test]
    #[ignore = "requires network access"]
    async fn test_fetch_bea() {
        let entries = fetch(FeedSource::Bea).await;
        assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
        let entries = entries.unwrap();
        assert!(!entries.is_empty());
        assert_eq!(entries[0].source, "Bureau of Economic Analysis");
    }

    #[tokio::test]
    #[ignore = "requires network access"]
    async fn test_fetch_ecb() {
        let entries = fetch(FeedSource::Ecb).await;
        assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
        let entries = entries.unwrap();
        assert!(!entries.is_empty());
    }
}