use feed_rs::parser;
use futures::future::join_all;
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use std::sync::OnceLock;
use std::time::Duration;
use crate::error::{FinanceError, Result};
static FEED_UA: OnceLock<String> = OnceLock::new();
const FEED_TIMEOUT_SECONDS: u64 = 30;
fn feed_user_agent() -> &'static str {
FEED_UA.get_or_init(|| {
match std::env::var("EDGAR_EMAIL") {
Ok(email) if !email.trim().is_empty() => {
format!(
"finance-query/{} ({})",
env!("CARGO_PKG_VERSION"),
email.trim()
)
}
_ => concat!(
"finance-query/",
env!("CARGO_PKG_VERSION"),
" (+https://github.com/Verdenroz/finance-query)"
)
.to_string(),
}
})
}
fn build_feed_client() -> reqwest::Client {
reqwest::Client::builder()
.user_agent(feed_user_agent())
.timeout(Duration::from_secs(FEED_TIMEOUT_SECONDS))
.build()
.expect("failed to build feeds HTTP client")
}
#[derive(Debug, Clone)]
#[non_exhaustive]
pub enum FeedSource {
FederalReserve,
SecPressReleases,
SecFilings(String),
MarketWatch,
Cnbc,
Bloomberg,
FinancialTimes,
NytBusiness,
GuardianBusiness,
Investing,
Bea,
Ecb,
Cfpb,
WsjMarkets,
Fortune,
BusinessWire,
CoinDesk,
CoinTelegraph,
TechCrunch,
HackerNews,
OilPrice,
CalculatedRisk,
Scmp,
NikkeiAsia,
BankOfEngland,
VentureBeat,
YCombinator,
TheEconomist,
FinancialPost,
FtLex,
RitholtzBigPicture,
Custom(String),
}
impl FeedSource {
pub fn url(&self) -> String {
match self {
Self::FederalReserve => {
"https://www.federalreserve.gov/feeds/press_all.xml".to_string()
}
Self::SecPressReleases => "https://www.sec.gov/news/pressreleases.rss".to_string(),
Self::SecFilings(form_type) => format!(
"https://www.sec.gov/cgi-bin/browse-edgar?action=getcurrent&type={form_type}&output=atom"
),
Self::MarketWatch => {
"https://feeds.content.dowjones.io/public/rss/mw_topstories".to_string()
}
Self::Cnbc => "https://www.cnbc.com/id/100003114/device/rss/rss.html".to_string(),
Self::Bloomberg => "https://feeds.bloomberg.com/markets/news.rss".to_string(),
Self::FinancialTimes => "https://www.ft.com/markets?format=rss".to_string(),
Self::NytBusiness => {
"https://rss.nytimes.com/services/xml/rss/nyt/Business.xml".to_string()
}
Self::GuardianBusiness => "https://www.theguardian.com/business/rss".to_string(),
Self::Investing => "https://www.investing.com/rss/news.rss".to_string(),
Self::Bea => "https://apps.bea.gov/rss/rss.xml".to_string(),
Self::Ecb => "https://www.ecb.europa.eu/rss/press.html".to_string(),
Self::Cfpb => "https://www.consumerfinance.gov/about-us/newsroom/feed/".to_string(),
Self::WsjMarkets => "https://feeds.a.dj.com/rss/RSSMarketsMain.xml".to_string(),
Self::Fortune => "https://fortune.com/feed".to_string(),
Self::BusinessWire => {
"https://feed.businesswire.com/rss/home/?rss=G1QFDERJXkJeGVtQXw==".to_string()
}
Self::CoinDesk => "https://www.coindesk.com/arc/outboundfeeds/rss/".to_string(),
Self::CoinTelegraph => "https://cointelegraph.com/rss".to_string(),
Self::TechCrunch => "https://techcrunch.com/feed/".to_string(),
Self::HackerNews => "https://hnrss.org/newest?points=100".to_string(),
Self::OilPrice => "https://oilprice.com/rss/main".to_string(),
Self::CalculatedRisk => "https://calculatedrisk.substack.com/feed".to_string(),
Self::Scmp => "https://www.scmp.com/rss/91/feed".to_string(),
Self::NikkeiAsia => "https://asia.nikkei.com/rss/feed/nar".to_string(),
Self::BankOfEngland => "https://www.bankofengland.co.uk/rss/news".to_string(),
Self::VentureBeat => "https://venturebeat.com/feed/".to_string(),
Self::YCombinator => "https://blog.ycombinator.com/feed/".to_string(),
Self::TheEconomist => {
"https://www.economist.com/sections/economics/rss.xml".to_string()
}
Self::FinancialPost => "https://financialpost.com/feed".to_string(),
Self::FtLex => "https://www.ft.com/lex?format=rss".to_string(),
Self::RitholtzBigPicture => "https://ritholtz.com/feed/".to_string(),
Self::Custom(url) => url.clone(),
}
}
pub fn name(&self) -> String {
match self {
Self::FederalReserve => "Federal Reserve".to_string(),
Self::SecPressReleases => "SEC".to_string(),
Self::SecFilings(form) => format!("SEC EDGAR ({form})"),
Self::MarketWatch => "MarketWatch".to_string(),
Self::Cnbc => "CNBC".to_string(),
Self::Bloomberg => "Bloomberg".to_string(),
Self::FinancialTimes => "Financial Times".to_string(),
Self::NytBusiness => "New York Times".to_string(),
Self::GuardianBusiness => "The Guardian".to_string(),
Self::Investing => "Investing.com".to_string(),
Self::Bea => "Bureau of Economic Analysis".to_string(),
Self::Ecb => "European Central Bank".to_string(),
Self::Cfpb => "CFPB".to_string(),
Self::WsjMarkets => "Wall Street Journal".to_string(),
Self::Fortune => "Fortune".to_string(),
Self::BusinessWire => "Business Wire".to_string(),
Self::CoinDesk => "CoinDesk".to_string(),
Self::CoinTelegraph => "CoinTelegraph".to_string(),
Self::TechCrunch => "TechCrunch".to_string(),
Self::HackerNews => "Hacker News".to_string(),
Self::OilPrice => "OilPrice.com".to_string(),
Self::CalculatedRisk => "Calculated Risk".to_string(),
Self::Scmp => "South China Morning Post".to_string(),
Self::NikkeiAsia => "Nikkei Asia".to_string(),
Self::BankOfEngland => "Bank of England".to_string(),
Self::VentureBeat => "VentureBeat".to_string(),
Self::YCombinator => "Y Combinator".to_string(),
Self::TheEconomist => "The Economist".to_string(),
Self::FinancialPost => "Financial Post".to_string(),
Self::FtLex => "Financial Times Lex".to_string(),
Self::RitholtzBigPicture => "The Big Picture".to_string(),
Self::Custom(url) => url.clone(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[non_exhaustive]
pub struct FeedEntry {
pub title: String,
pub url: String,
pub published: Option<String>,
pub summary: Option<String>,
pub source: String,
}
pub async fn fetch(source: FeedSource) -> Result<Vec<FeedEntry>> {
let client = build_feed_client();
fetch_with_client(&client, &source.url(), &source.name()).await
}
pub async fn fetch_all(sources: &[FeedSource]) -> Result<Vec<FeedEntry>> {
let client = build_feed_client();
let pairs: Vec<(String, String)> = sources.iter().map(|s| (s.url(), s.name())).collect();
let futures: Vec<_> = pairs
.iter()
.map(|(url, name)| fetch_with_client(&client, url, name))
.collect();
let results = join_all(futures).await;
let mut seen_urls: HashSet<String> = HashSet::new();
let mut entries: Vec<FeedEntry> = results
.into_iter()
.flat_map(|r| r.unwrap_or_default())
.filter(|e| seen_urls.insert(e.url.clone()))
.collect();
entries.sort_by(|a, b| b.published.cmp(&a.published));
Ok(entries)
}
async fn fetch_with_client(
client: &reqwest::Client,
url: &str,
source_name: &str,
) -> Result<Vec<FeedEntry>> {
let source = source_name.to_string();
let text = client.get(url).send().await?.text().await?;
let feed = parser::parse(text.as_bytes()).map_err(|e| FinanceError::FeedParseError {
url: url.to_string(),
context: e.to_string(),
})?;
let entries = feed
.entries
.into_iter()
.filter_map(|entry| {
let title = entry.title.map(|t| t.content)?.trim().to_string();
if title.is_empty() {
return None;
}
let url_str = entry
.links
.into_iter()
.next()
.map(|l| l.href)
.unwrap_or_default();
if url_str.is_empty() {
return None;
}
let published = entry.published.or(entry.updated).map(|dt| dt.to_rfc3339());
let summary = entry
.summary
.map(|s| s.content)
.or_else(|| entry.content.and_then(|c| c.body));
Some(FeedEntry {
title,
url: url_str,
published,
summary,
source: source.clone(),
})
})
.collect();
Ok(entries)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_feed_source_urls() {
assert!(FeedSource::FederalReserve.url().starts_with("https://"));
assert!(FeedSource::SecPressReleases.url().starts_with("https://"));
assert!(
FeedSource::SecFilings("10-K".to_string())
.url()
.contains("10-K")
);
assert_eq!(
FeedSource::Custom("https://example.com/feed.rss".to_string()).url(),
"https://example.com/feed.rss"
);
assert!(FeedSource::Bloomberg.url().starts_with("https://"));
assert!(FeedSource::FinancialTimes.url().starts_with("https://"));
assert!(FeedSource::NytBusiness.url().starts_with("https://"));
assert!(FeedSource::GuardianBusiness.url().starts_with("https://"));
assert!(FeedSource::Investing.url().starts_with("https://"));
assert!(FeedSource::Bea.url().starts_with("https://"));
assert!(FeedSource::Ecb.url().starts_with("https://"));
assert!(FeedSource::Cfpb.url().starts_with("https://"));
assert!(FeedSource::WsjMarkets.url().contains("dj.com"));
assert!(FeedSource::Fortune.url().contains("fortune.com"));
assert!(FeedSource::BusinessWire.url().contains("businesswire.com"));
assert!(FeedSource::CoinDesk.url().contains("coindesk.com"));
assert!(
FeedSource::CoinTelegraph
.url()
.contains("cointelegraph.com")
);
assert!(FeedSource::TechCrunch.url().contains("techcrunch.com"));
assert!(FeedSource::HackerNews.url().contains("hnrss.org"));
}
#[test]
fn test_feed_source_names() {
assert_eq!(FeedSource::FederalReserve.name(), "Federal Reserve");
assert_eq!(FeedSource::MarketWatch.name(), "MarketWatch");
assert_eq!(FeedSource::Bloomberg.name(), "Bloomberg");
assert_eq!(FeedSource::FinancialTimes.name(), "Financial Times");
assert_eq!(FeedSource::NytBusiness.name(), "New York Times");
assert_eq!(FeedSource::GuardianBusiness.name(), "The Guardian");
assert_eq!(FeedSource::Investing.name(), "Investing.com");
assert_eq!(FeedSource::Bea.name(), "Bureau of Economic Analysis");
assert_eq!(FeedSource::Ecb.name(), "European Central Bank");
assert_eq!(FeedSource::Cfpb.name(), "CFPB");
assert_eq!(FeedSource::WsjMarkets.name(), "Wall Street Journal");
assert_eq!(FeedSource::Fortune.name(), "Fortune");
assert_eq!(FeedSource::BusinessWire.name(), "Business Wire");
assert_eq!(FeedSource::CoinDesk.name(), "CoinDesk");
assert_eq!(FeedSource::CoinTelegraph.name(), "CoinTelegraph");
assert_eq!(FeedSource::TechCrunch.name(), "TechCrunch");
assert_eq!(FeedSource::HackerNews.name(), "Hacker News");
}
#[tokio::test]
#[ignore = "requires network access"]
async fn test_fetch_fed_reserve() {
let entries = fetch(FeedSource::FederalReserve).await;
assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
let entries = entries.unwrap();
assert!(!entries.is_empty());
for e in entries.iter().take(3) {
assert!(!e.title.is_empty());
assert!(!e.url.is_empty());
}
}
#[tokio::test]
#[ignore = "requires network access"]
async fn test_fetch_bloomberg() {
let entries = fetch(FeedSource::Bloomberg).await;
assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
let entries = entries.unwrap();
assert!(!entries.is_empty());
for e in entries.iter().take(3) {
assert!(!e.title.is_empty());
assert!(!e.url.is_empty());
assert_eq!(e.source, "Bloomberg");
}
}
#[tokio::test]
#[ignore = "requires network access"]
async fn test_fetch_financial_times() {
let entries = fetch(FeedSource::FinancialTimes).await;
assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
let entries = entries.unwrap();
assert!(!entries.is_empty());
}
#[tokio::test]
#[ignore = "requires network access"]
async fn test_fetch_bea() {
let entries = fetch(FeedSource::Bea).await;
assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
let entries = entries.unwrap();
assert!(!entries.is_empty());
assert_eq!(entries[0].source, "Bureau of Economic Analysis");
}
#[tokio::test]
#[ignore = "requires network access"]
async fn test_fetch_ecb() {
let entries = fetch(FeedSource::Ecb).await;
assert!(entries.is_ok(), "Expected ok, got: {:?}", entries.err());
let entries = entries.unwrap();
assert!(!entries.is_empty());
}
}