finance_query/adapters/edgar/mod.rs
1//! SEC EDGAR API client.
2//!
3//! Provides access to SEC EDGAR data including filing history,
4//! structured XBRL financial data, and full-text search.
5//!
6//! All requests are rate-limited to 10 per second as required by SEC.
7//! Rate limiting and CIK caching are managed via a process-global singleton.
8//!
9//! # Quick Start
10//!
11//! Initialize once at application startup, then use anywhere:
12//!
13//! ```no_run
14//! use finance_query::edgar;
15//!
16//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
17//! // Initialize once (required)
18//! edgar::init("user@example.com")?;
19//!
20//! // Use anywhere
21//! let cik = edgar::resolve_cik("AAPL").await?;
22//! let submissions = edgar::submissions(cik).await?;
23//! let facts = edgar::company_facts(cik).await?;
24//!
25//! // Search filings
26//! let results = edgar::search(
27//! "artificial intelligence",
28//! Some(&["10-K"]),
29//! Some("2024-01-01"),
30//! None,
31//! None,
32//! None,
33//! ).await?;
34//! # Ok(())
35//! # }
36//! ```
37
38mod client;
39mod endpoints;
40
41use crate::error::{FinanceError, Result};
42use crate::models::filings::{
43 CompanyFacts, EdgarFilingIndex, EdgarSearchResults, EdgarSubmissions,
44};
45use crate::rate_limiter::RateLimiter;
46use client::EdgarClientBuilder;
47use std::collections::HashMap;
48use std::sync::{Arc, OnceLock};
49use std::time::Duration;
50use tokio::sync::RwLock;
51
52/// SEC EDGAR rate limit: 10 requests per second.
53const EDGAR_RATE_PER_SEC: f64 = 10.0;
54
55/// Stable configuration stored in the EDGAR process-global singleton.
56///
57/// Only configuration, the rate limiter, and the CIK cache are stored — NOT
58/// the `reqwest::Client`. `reqwest::Client` internally spawns hyper
59/// connection-pool tasks on whichever tokio runtime first uses them; when that
60/// runtime is dropped (e.g. at the end of a `#[tokio::test]`), those tasks die
61/// and subsequent calls from a different runtime receive `DispatchGone`. A fresh
62/// `reqwest::Client` is built per public function call via
63/// [`EdgarClientBuilder::build_with_shared_state`], reusing the shared rate
64/// limiter and CIK cache.
65struct EdgarSingleton {
66 email: String,
67 app_name: String,
68 timeout: Duration,
69 rate_limiter: Arc<RateLimiter>,
70 cik_cache: Arc<RwLock<Option<HashMap<String, u64>>>>,
71}
72
73static EDGAR_SINGLETON: OnceLock<EdgarSingleton> = OnceLock::new();
74
75/// Initialize the global EDGAR client with a contact email.
76///
77/// This function must be called once before using any EDGAR functions.
78/// The SEC requires all automated requests to include a User-Agent header
79/// with a contact email address.
80///
81/// # Arguments
82///
83/// * `email` - Contact email address (included in User-Agent header)
84///
85/// # Example
86///
87/// ```no_run
88/// use finance_query::edgar;
89///
90/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
91/// edgar::init("user@example.com")?;
92/// # Ok(())
93/// # }
94/// ```
95///
96/// # Errors
97///
98/// Returns an error if EDGAR has already been initialized.
99pub fn init(email: impl Into<String>) -> Result<()> {
100 init_with_config(email, "finance-query", Duration::from_secs(30))
101}
102
103/// Initialize the global EDGAR client with full configuration.
104///
105/// Use this for custom app name and timeout settings.
106///
107/// # Arguments
108///
109/// * `email` - Contact email address (required by SEC)
110/// * `app_name` - Application name (included in User-Agent)
111/// * `timeout` - HTTP request timeout duration
112///
113/// # Example
114///
115/// ```no_run
116/// use finance_query::edgar;
117/// use std::time::Duration;
118///
119/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
120/// edgar::init_with_config(
121/// "user@example.com",
122/// "my-app",
123/// Duration::from_secs(60),
124/// )?;
125/// # Ok(())
126/// # }
127/// ```
128pub fn init_with_config(
129 email: impl Into<String>,
130 app_name: impl Into<String>,
131 timeout: Duration,
132) -> Result<()> {
133 EDGAR_SINGLETON
134 .set(EdgarSingleton {
135 email: email.into(),
136 app_name: app_name.into(),
137 timeout,
138 rate_limiter: Arc::new(RateLimiter::new(EDGAR_RATE_PER_SEC)),
139 cik_cache: Arc::new(RwLock::new(None)),
140 })
141 .map_err(|_| FinanceError::InvalidParameter {
142 param: "edgar".to_string(),
143 reason: "EDGAR client already initialized".to_string(),
144 })
145}
146
147/// Build a fresh [`EdgarClient`](client::EdgarClient) from the singleton's
148/// config, reusing the shared rate limiter and CIK cache.
149///
150/// If EDGAR hasn't been explicitly initialized, falls back to the `EDGAR_EMAIL`
151/// environment variable as a convenience (consistent with other adapters).
152fn build_client() -> Result<client::EdgarClient> {
153 if EDGAR_SINGLETON.get().is_none()
154 && let Ok(email) = std::env::var("EDGAR_EMAIL")
155 {
156 let _ = EDGAR_SINGLETON.set(EdgarSingleton {
157 email,
158 app_name: "finance-query".to_string(),
159 timeout: Duration::from_secs(30),
160 rate_limiter: Arc::new(RateLimiter::new(EDGAR_RATE_PER_SEC)),
161 cik_cache: Arc::new(RwLock::new(None)),
162 });
163 }
164 let s = EDGAR_SINGLETON
165 .get()
166 .ok_or_else(|| FinanceError::InvalidParameter {
167 param: "edgar".to_string(),
168 reason: "EDGAR_EMAIL not set. Call edgar::init(email) or set EDGAR_EMAIL env var."
169 .to_string(),
170 })?;
171 EdgarClientBuilder::new(&s.email)
172 .app_name(&s.app_name)
173 .timeout(s.timeout)
174 .build_with_shared_state(Arc::clone(&s.rate_limiter), Arc::clone(&s.cik_cache))
175}
176
177fn accession_parts(accession_number: &str) -> Result<(String, String)> {
178 let cik_part = accession_number
179 .split('-')
180 .next()
181 .unwrap_or("")
182 .trim_start_matches('0')
183 .to_string();
184 let accession_no_dashes = accession_number.replace('-', "");
185
186 if cik_part.is_empty() || accession_no_dashes.is_empty() {
187 return Err(FinanceError::InvalidParameter {
188 param: "accession_number".to_string(),
189 reason: "Invalid accession number format".to_string(),
190 });
191 }
192
193 Ok((cik_part, accession_no_dashes))
194}
195
196/// Resolve a ticker symbol to its SEC CIK number.
197///
198/// The ticker-to-CIK mapping is fetched once and cached process-wide.
199/// Lookups are case-insensitive.
200///
201/// # Example
202///
203/// ```no_run
204/// use finance_query::edgar;
205///
206/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
207/// edgar::init("user@example.com")?;
208/// let cik = edgar::resolve_cik("AAPL").await?;
209/// assert_eq!(cik, 320193);
210/// # Ok(())
211/// # }
212/// ```
213///
214/// # Errors
215///
216/// Returns an error if:
217/// - EDGAR has not been initialized (call `init()` first)
218/// - Symbol not found in SEC database
219/// - Network request fails
220pub async fn resolve_cik(symbol: &str) -> Result<u64> {
221 build_client()?.resolve_cik(symbol).await
222}
223
224/// Fetch filing history and company metadata for a CIK.
225///
226/// Returns the most recent ~1000 filings inline, with references to
227/// additional history files for older filings.
228///
229/// # Example
230///
231/// ```no_run
232/// use finance_query::edgar;
233///
234/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
235/// edgar::init("user@example.com")?;
236/// let cik = edgar::resolve_cik("AAPL").await?;
237/// let submissions = edgar::submissions(cik).await?;
238/// println!("Company: {:?}", submissions.name);
239/// # Ok(())
240/// # }
241/// ```
242pub async fn submissions(cik: u64) -> Result<EdgarSubmissions> {
243 build_client()?.submissions(cik).await
244}
245
246/// Fetch structured XBRL financial data for a CIK.
247///
248/// Returns all extracted XBRL facts organized by taxonomy (us-gaap, ifrs, dei).
249/// This can be a large response (several MB for major companies).
250///
251/// # Example
252///
253/// ```no_run
254/// use finance_query::edgar;
255///
256/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
257/// edgar::init("user@example.com")?;
258/// let cik = edgar::resolve_cik("AAPL").await?;
259/// let facts = edgar::company_facts(cik).await?;
260/// println!("Entity: {:?}", facts.entity_name);
261/// # Ok(())
262/// # }
263/// ```
264pub async fn company_facts(cik: u64) -> Result<CompanyFacts> {
265 build_client()?.company_facts(cik).await
266}
267
268/// Fetch the filing index for a specific accession number.
269///
270/// This provides the file list for a filing, which can be used to locate
271/// the primary HTML document and file sizes.
272///
273/// # Example
274///
275/// ```no_run
276/// use finance_query::edgar;
277///
278/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
279/// edgar::init("user@example.com")?;
280/// let index = edgar::filing_index("0000320193-24-000123").await?;
281/// println!("Files: {}", index.directory.item.len());
282/// # Ok(())
283/// # }
284/// ```
285pub async fn filing_index(accession_number: &str) -> Result<EdgarFilingIndex> {
286 build_client()?.filing_index(accession_number).await
287}
288
289/// Search SEC EDGAR filings by text content.
290///
291/// # Arguments
292///
293/// * `query` - Search term or phrase
294/// * `forms` - Optional form type filter (e.g., `&["10-K", "10-Q"]`)
295/// * `start_date` - Optional start date (YYYY-MM-DD)
296/// * `end_date` - Optional end date (YYYY-MM-DD)
297/// * `from` - Optional pagination offset (default: 0)
298/// * `size` - Optional page size (default: 100, max: 100)
299///
300/// # Example
301///
302/// ```no_run
303/// use finance_query::edgar;
304///
305/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
306/// edgar::init("user@example.com")?;
307/// let results = edgar::search(
308/// "artificial intelligence",
309/// Some(&["10-K"]),
310/// Some("2024-01-01"),
311/// None,
312/// Some(0),
313/// Some(100),
314/// ).await?;
315/// if let Some(hits_container) = &results.hits {
316/// println!("Found {} results", hits_container.total.as_ref().and_then(|t| t.value).unwrap_or(0));
317/// }
318/// # Ok(())
319/// # }
320/// ```
321pub async fn search(
322 query: &str,
323 forms: Option<&[&str]>,
324 start_date: Option<&str>,
325 end_date: Option<&str>,
326 from: Option<usize>,
327 size: Option<usize>,
328) -> Result<EdgarSearchResults> {
329 build_client()?
330 .search(query, forms, start_date, end_date, from, size)
331 .await
332}
333
334// ============================================================================
335// Canonical model conversion functions
336// ============================================================================
337
338/// Fetch canonical ProviderFilings for a ticker symbol.
339pub async fn fetch_filings_response(
340 symbol: &str,
341) -> Result<crate::models::filings::ProviderFilings> {
342 use crate::models::filings::{ProviderFiling, ProviderFilings};
343
344 let cik_num = resolve_cik(symbol).await?;
345 let subs = submissions(cik_num).await?;
346
347 let cik = subs.cik.clone().unwrap_or_default();
348 let company_name = subs.name.clone();
349 let filings = subs
350 .filings
351 .and_then(|f| f.recent)
352 .map(|r| r.to_filings())
353 .unwrap_or_default()
354 .into_iter()
355 .map(|f| {
356 let accession_no_dashes = f.accession_number.replace('-', "");
357 let url = if !cik.is_empty()
358 && !accession_no_dashes.is_empty()
359 && !f.primary_document.is_empty()
360 {
361 Some(format!(
362 "https://www.sec.gov/Archives/edgar/data/{}/{}/{}",
363 cik.trim_start_matches('0'),
364 accession_no_dashes,
365 f.primary_document
366 ))
367 } else {
368 None
369 };
370 ProviderFiling {
371 accession_number: Some(f.accession_number),
372 filing_date: Some(f.filing_date),
373 filing_type: Some(f.form),
374 filing_url: url,
375 company_name: company_name.clone(),
376 cik: Some(cik.clone()),
377 }
378 })
379 .collect();
380
381 Ok(ProviderFilings {
382 symbol: symbol.to_string(),
383 filings,
384 })
385}
386
387#[cfg(test)]
388mod tests {
389 use super::*;
390
391 #[test]
392 fn test_init_sets_singleton() {
393 let result = init("test@example.com");
394 assert!(result.is_ok() || result.is_err()); // May already be initialized
395 }
396
397 #[test]
398 fn test_double_init_fails() {
399 let _ = init("first@example.com");
400 let result = init("second@example.com");
401 assert!(matches!(result, Err(FinanceError::InvalidParameter { .. })));
402 }
403
404 #[test]
405 fn test_singleton_is_set_after_init() {
406 let _ = init("test@example.com");
407 assert!(EDGAR_SINGLETON.get().is_some());
408 }
409}