finance_query/edgar/mod.rs
1//! SEC EDGAR API client.
2//!
3//! Provides access to SEC EDGAR data including filing history,
4//! structured XBRL financial data, and full-text search.
5//!
6//! All requests are rate-limited to 10 per second as required by SEC.
7//! Rate limiting and CIK caching are managed via a process-global singleton.
8//!
9//! # Quick Start
10//!
11//! Initialize once at application startup, then use anywhere:
12//!
13//! ```no_run
14//! use finance_query::edgar;
15//!
16//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
17//! // Initialize once (required)
18//! edgar::init("user@example.com")?;
19//!
20//! // Use anywhere
21//! let cik = edgar::resolve_cik("AAPL").await?;
22//! let submissions = edgar::submissions(cik).await?;
23//! let facts = edgar::company_facts(cik).await?;
24//!
25//! // Search filings
26//! let results = edgar::search(
27//! "artificial intelligence",
28//! Some(&["10-K"]),
29//! Some("2024-01-01"),
30//! None,
31//! None,
32//! None,
33//! ).await?;
34//! # Ok(())
35//! # }
36//! ```
37
38mod client;
39
40use crate::error::{FinanceError, Result};
41use crate::models::edgar::{CompanyFacts, EdgarFilingIndex, EdgarSearchResults, EdgarSubmissions};
42use crate::rate_limiter::RateLimiter;
43use client::EdgarClientBuilder;
44use std::collections::HashMap;
45use std::sync::{Arc, OnceLock};
46use std::time::Duration;
47use tokio::sync::RwLock;
48
49/// SEC EDGAR rate limit: 10 requests per second.
50const EDGAR_RATE_PER_SEC: f64 = 10.0;
51
52/// Stable configuration stored in the EDGAR process-global singleton.
53///
54/// Only configuration, the rate limiter, and the CIK cache are stored — NOT
55/// the `reqwest::Client`. `reqwest::Client` internally spawns hyper
56/// connection-pool tasks on whichever tokio runtime first uses them; when that
57/// runtime is dropped (e.g. at the end of a `#[tokio::test]`), those tasks die
58/// and subsequent calls from a different runtime receive `DispatchGone`. A fresh
59/// `reqwest::Client` is built per public function call via
60/// [`EdgarClientBuilder::build_with_shared_state`], reusing the shared rate
61/// limiter and CIK cache.
62struct EdgarSingleton {
63 email: String,
64 app_name: String,
65 timeout: Duration,
66 rate_limiter: Arc<RateLimiter>,
67 cik_cache: Arc<RwLock<Option<HashMap<String, u64>>>>,
68}
69
70static EDGAR_SINGLETON: OnceLock<EdgarSingleton> = OnceLock::new();
71
72/// Initialize the global EDGAR client with a contact email.
73///
74/// This function must be called once before using any EDGAR functions.
75/// The SEC requires all automated requests to include a User-Agent header
76/// with a contact email address.
77///
78/// # Arguments
79///
80/// * `email` - Contact email address (included in User-Agent header)
81///
82/// # Example
83///
84/// ```no_run
85/// use finance_query::edgar;
86///
87/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
88/// edgar::init("user@example.com")?;
89/// # Ok(())
90/// # }
91/// ```
92///
93/// # Errors
94///
95/// Returns an error if EDGAR has already been initialized.
96pub fn init(email: impl Into<String>) -> Result<()> {
97 EDGAR_SINGLETON
98 .set(EdgarSingleton {
99 email: email.into(),
100 app_name: "finance-query".to_string(),
101 timeout: Duration::from_secs(30),
102 rate_limiter: Arc::new(RateLimiter::new(EDGAR_RATE_PER_SEC)),
103 cik_cache: Arc::new(RwLock::new(None)),
104 })
105 .map_err(|_| FinanceError::InvalidParameter {
106 param: "edgar".to_string(),
107 reason: "EDGAR client already initialized".to_string(),
108 })
109}
110
111/// Initialize the global EDGAR client with full configuration.
112///
113/// Use this for custom app name and timeout settings.
114///
115/// # Arguments
116///
117/// * `email` - Contact email address (required by SEC)
118/// * `app_name` - Application name (included in User-Agent)
119/// * `timeout` - HTTP request timeout duration
120///
121/// # Example
122///
123/// ```no_run
124/// use finance_query::edgar;
125/// use std::time::Duration;
126///
127/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
128/// edgar::init_with_config(
129/// "user@example.com",
130/// "my-app",
131/// Duration::from_secs(60),
132/// )?;
133/// # Ok(())
134/// # }
135/// ```
136pub fn init_with_config(
137 email: impl Into<String>,
138 app_name: impl Into<String>,
139 timeout: Duration,
140) -> Result<()> {
141 EDGAR_SINGLETON
142 .set(EdgarSingleton {
143 email: email.into(),
144 app_name: app_name.into(),
145 timeout,
146 rate_limiter: Arc::new(RateLimiter::new(EDGAR_RATE_PER_SEC)),
147 cik_cache: Arc::new(RwLock::new(None)),
148 })
149 .map_err(|_| FinanceError::InvalidParameter {
150 param: "edgar".to_string(),
151 reason: "EDGAR client already initialized".to_string(),
152 })
153}
154
155/// Build a fresh [`EdgarClient`](client::EdgarClient) from the singleton's
156/// config, reusing the shared rate limiter and CIK cache.
157fn build_client() -> Result<client::EdgarClient> {
158 let s = EDGAR_SINGLETON
159 .get()
160 .ok_or_else(|| FinanceError::InvalidParameter {
161 param: "edgar".to_string(),
162 reason: "EDGAR not initialized. Call edgar::init(email) first.".to_string(),
163 })?;
164 EdgarClientBuilder::new(&s.email)
165 .app_name(&s.app_name)
166 .timeout(s.timeout)
167 .build_with_shared_state(Arc::clone(&s.rate_limiter), Arc::clone(&s.cik_cache))
168}
169
170fn accession_parts(accession_number: &str) -> Result<(String, String)> {
171 let cik_part = accession_number
172 .split('-')
173 .next()
174 .unwrap_or("")
175 .trim_start_matches('0')
176 .to_string();
177 let accession_no_dashes = accession_number.replace('-', "");
178
179 if cik_part.is_empty() || accession_no_dashes.is_empty() {
180 return Err(FinanceError::InvalidParameter {
181 param: "accession_number".to_string(),
182 reason: "Invalid accession number format".to_string(),
183 });
184 }
185
186 Ok((cik_part, accession_no_dashes))
187}
188
189/// Resolve a ticker symbol to its SEC CIK number.
190///
191/// The ticker-to-CIK mapping is fetched once and cached process-wide.
192/// Lookups are case-insensitive.
193///
194/// # Example
195///
196/// ```no_run
197/// use finance_query::edgar;
198///
199/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
200/// edgar::init("user@example.com")?;
201/// let cik = edgar::resolve_cik("AAPL").await?;
202/// assert_eq!(cik, 320193);
203/// # Ok(())
204/// # }
205/// ```
206///
207/// # Errors
208///
209/// Returns an error if:
210/// - EDGAR has not been initialized (call `init()` first)
211/// - Symbol not found in SEC database
212/// - Network request fails
213pub async fn resolve_cik(symbol: &str) -> Result<u64> {
214 build_client()?.resolve_cik(symbol).await
215}
216
217/// Fetch filing history and company metadata for a CIK.
218///
219/// Returns the most recent ~1000 filings inline, with references to
220/// additional history files for older filings.
221///
222/// # Example
223///
224/// ```no_run
225/// use finance_query::edgar;
226///
227/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
228/// edgar::init("user@example.com")?;
229/// let cik = edgar::resolve_cik("AAPL").await?;
230/// let submissions = edgar::submissions(cik).await?;
231/// println!("Company: {:?}", submissions.name);
232/// # Ok(())
233/// # }
234/// ```
235pub async fn submissions(cik: u64) -> Result<EdgarSubmissions> {
236 build_client()?.submissions(cik).await
237}
238
239/// Fetch structured XBRL financial data for a CIK.
240///
241/// Returns all extracted XBRL facts organized by taxonomy (us-gaap, ifrs, dei).
242/// This can be a large response (several MB for major companies).
243///
244/// # Example
245///
246/// ```no_run
247/// use finance_query::edgar;
248///
249/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
250/// edgar::init("user@example.com")?;
251/// let cik = edgar::resolve_cik("AAPL").await?;
252/// let facts = edgar::company_facts(cik).await?;
253/// println!("Entity: {:?}", facts.entity_name);
254/// # Ok(())
255/// # }
256/// ```
257pub async fn company_facts(cik: u64) -> Result<CompanyFacts> {
258 build_client()?.company_facts(cik).await
259}
260
261/// Fetch the filing index for a specific accession number.
262///
263/// This provides the file list for a filing, which can be used to locate
264/// the primary HTML document and file sizes.
265///
266/// # Example
267///
268/// ```no_run
269/// use finance_query::edgar;
270///
271/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
272/// edgar::init("user@example.com")?;
273/// let index = edgar::filing_index("0000320193-24-000123").await?;
274/// println!("Files: {}", index.directory.item.len());
275/// # Ok(())
276/// # }
277/// ```
278pub async fn filing_index(accession_number: &str) -> Result<EdgarFilingIndex> {
279 build_client()?.filing_index(accession_number).await
280}
281
282/// Search SEC EDGAR filings by text content.
283///
284/// # Arguments
285///
286/// * `query` - Search term or phrase
287/// * `forms` - Optional form type filter (e.g., `&["10-K", "10-Q"]`)
288/// * `start_date` - Optional start date (YYYY-MM-DD)
289/// * `end_date` - Optional end date (YYYY-MM-DD)
290/// * `from` - Optional pagination offset (default: 0)
291/// * `size` - Optional page size (default: 100, max: 100)
292///
293/// # Example
294///
295/// ```no_run
296/// use finance_query::edgar;
297///
298/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
299/// edgar::init("user@example.com")?;
300/// let results = edgar::search(
301/// "artificial intelligence",
302/// Some(&["10-K"]),
303/// Some("2024-01-01"),
304/// None,
305/// Some(0),
306/// Some(100),
307/// ).await?;
308/// if let Some(hits_container) = &results.hits {
309/// println!("Found {} results", hits_container.total.as_ref().and_then(|t| t.value).unwrap_or(0));
310/// }
311/// # Ok(())
312/// # }
313/// ```
314pub async fn search(
315 query: &str,
316 forms: Option<&[&str]>,
317 start_date: Option<&str>,
318 end_date: Option<&str>,
319 from: Option<usize>,
320 size: Option<usize>,
321) -> Result<EdgarSearchResults> {
322 build_client()?
323 .search(query, forms, start_date, end_date, from, size)
324 .await
325}
326
327#[cfg(test)]
328mod tests {
329 use super::*;
330
331 #[test]
332 fn test_init_sets_singleton() {
333 let result = init("test@example.com");
334 assert!(result.is_ok() || result.is_err()); // May already be initialized
335 }
336
337 #[test]
338 fn test_double_init_fails() {
339 let _ = init("first@example.com");
340 let result = init("second@example.com");
341 assert!(matches!(result, Err(FinanceError::InvalidParameter { .. })));
342 }
343
344 #[test]
345 fn test_singleton_is_set_after_init() {
346 let _ = init("test@example.com");
347 assert!(EDGAR_SINGLETON.get().is_some());
348 }
349}