Skip to main content

finance_query/edgar/
mod.rs

1//! SEC EDGAR API client.
2//!
3//! Provides access to SEC EDGAR data including filing history,
4//! structured XBRL financial data, and full-text search.
5//!
6//! All requests are rate-limited to 10 per second as required by SEC.
7//! Rate limiting and CIK caching are managed via a process-global singleton.
8//!
9//! # Quick Start
10//!
11//! Initialize once at application startup, then use anywhere:
12//!
13//! ```no_run
14//! use finance_query::edgar;
15//!
16//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
17//! // Initialize once (required)
18//! edgar::init("user@example.com")?;
19//!
20//! // Use anywhere
21//! let cik = edgar::resolve_cik("AAPL").await?;
22//! let submissions = edgar::submissions(cik).await?;
23//! let facts = edgar::company_facts(cik).await?;
24//!
25//! // Search filings
26//! let results = edgar::search(
27//!     "artificial intelligence",
28//!     Some(&["10-K"]),
29//!     Some("2024-01-01"),
30//!     None,
31//!     None,
32//!     None,
33//! ).await?;
34//! # Ok(())
35//! # }
36//! ```
37
38mod client;
39
40use crate::error::{FinanceError, Result};
41use crate::models::edgar::{CompanyFacts, EdgarFilingIndex, EdgarSearchResults, EdgarSubmissions};
42use crate::rate_limiter::RateLimiter;
43use client::EdgarClientBuilder;
44use std::collections::HashMap;
45use std::sync::{Arc, OnceLock};
46use std::time::Duration;
47use tokio::sync::RwLock;
48
49/// SEC EDGAR rate limit: 10 requests per second.
50const EDGAR_RATE_PER_SEC: f64 = 10.0;
51
52/// Stable configuration stored in the EDGAR process-global singleton.
53///
54/// Only configuration, the rate limiter, and the CIK cache are stored — NOT
55/// the `reqwest::Client`. `reqwest::Client` internally spawns hyper
56/// connection-pool tasks on whichever tokio runtime first uses them; when that
57/// runtime is dropped (e.g. at the end of a `#[tokio::test]`), those tasks die
58/// and subsequent calls from a different runtime receive `DispatchGone`. A fresh
59/// `reqwest::Client` is built per public function call via
60/// [`EdgarClientBuilder::build_with_shared_state`], reusing the shared rate
61/// limiter and CIK cache.
62struct EdgarSingleton {
63    email: String,
64    app_name: String,
65    timeout: Duration,
66    rate_limiter: Arc<RateLimiter>,
67    cik_cache: Arc<RwLock<Option<HashMap<String, u64>>>>,
68}
69
70static EDGAR_SINGLETON: OnceLock<EdgarSingleton> = OnceLock::new();
71
72/// Initialize the global EDGAR client with a contact email.
73///
74/// This function must be called once before using any EDGAR functions.
75/// The SEC requires all automated requests to include a User-Agent header
76/// with a contact email address.
77///
78/// # Arguments
79///
80/// * `email` - Contact email address (included in User-Agent header)
81///
82/// # Example
83///
84/// ```no_run
85/// use finance_query::edgar;
86///
87/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
88/// edgar::init("user@example.com")?;
89/// # Ok(())
90/// # }
91/// ```
92///
93/// # Errors
94///
95/// Returns an error if EDGAR has already been initialized.
96pub fn init(email: impl Into<String>) -> Result<()> {
97    EDGAR_SINGLETON
98        .set(EdgarSingleton {
99            email: email.into(),
100            app_name: "finance-query".to_string(),
101            timeout: Duration::from_secs(30),
102            rate_limiter: Arc::new(RateLimiter::new(EDGAR_RATE_PER_SEC)),
103            cik_cache: Arc::new(RwLock::new(None)),
104        })
105        .map_err(|_| FinanceError::InvalidParameter {
106            param: "edgar".to_string(),
107            reason: "EDGAR client already initialized".to_string(),
108        })
109}
110
111/// Initialize the global EDGAR client with full configuration.
112///
113/// Use this for custom app name and timeout settings.
114///
115/// # Arguments
116///
117/// * `email` - Contact email address (required by SEC)
118/// * `app_name` - Application name (included in User-Agent)
119/// * `timeout` - HTTP request timeout duration
120///
121/// # Example
122///
123/// ```no_run
124/// use finance_query::edgar;
125/// use std::time::Duration;
126///
127/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
128/// edgar::init_with_config(
129///     "user@example.com",
130///     "my-app",
131///     Duration::from_secs(60),
132/// )?;
133/// # Ok(())
134/// # }
135/// ```
136pub fn init_with_config(
137    email: impl Into<String>,
138    app_name: impl Into<String>,
139    timeout: Duration,
140) -> Result<()> {
141    EDGAR_SINGLETON
142        .set(EdgarSingleton {
143            email: email.into(),
144            app_name: app_name.into(),
145            timeout,
146            rate_limiter: Arc::new(RateLimiter::new(EDGAR_RATE_PER_SEC)),
147            cik_cache: Arc::new(RwLock::new(None)),
148        })
149        .map_err(|_| FinanceError::InvalidParameter {
150            param: "edgar".to_string(),
151            reason: "EDGAR client already initialized".to_string(),
152        })
153}
154
155/// Build a fresh [`EdgarClient`](client::EdgarClient) from the singleton's
156/// config, reusing the shared rate limiter and CIK cache.
157fn build_client() -> Result<client::EdgarClient> {
158    let s = EDGAR_SINGLETON
159        .get()
160        .ok_or_else(|| FinanceError::InvalidParameter {
161            param: "edgar".to_string(),
162            reason: "EDGAR not initialized. Call edgar::init(email) first.".to_string(),
163        })?;
164    EdgarClientBuilder::new(&s.email)
165        .app_name(&s.app_name)
166        .timeout(s.timeout)
167        .build_with_shared_state(Arc::clone(&s.rate_limiter), Arc::clone(&s.cik_cache))
168}
169
170fn accession_parts(accession_number: &str) -> Result<(String, String)> {
171    let cik_part = accession_number
172        .split('-')
173        .next()
174        .unwrap_or("")
175        .trim_start_matches('0')
176        .to_string();
177    let accession_no_dashes = accession_number.replace('-', "");
178
179    if cik_part.is_empty() || accession_no_dashes.is_empty() {
180        return Err(FinanceError::InvalidParameter {
181            param: "accession_number".to_string(),
182            reason: "Invalid accession number format".to_string(),
183        });
184    }
185
186    Ok((cik_part, accession_no_dashes))
187}
188
189/// Resolve a ticker symbol to its SEC CIK number.
190///
191/// The ticker-to-CIK mapping is fetched once and cached process-wide.
192/// Lookups are case-insensitive.
193///
194/// # Example
195///
196/// ```no_run
197/// use finance_query::edgar;
198///
199/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
200/// edgar::init("user@example.com")?;
201/// let cik = edgar::resolve_cik("AAPL").await?;
202/// assert_eq!(cik, 320193);
203/// # Ok(())
204/// # }
205/// ```
206///
207/// # Errors
208///
209/// Returns an error if:
210/// - EDGAR has not been initialized (call `init()` first)
211/// - Symbol not found in SEC database
212/// - Network request fails
213pub async fn resolve_cik(symbol: &str) -> Result<u64> {
214    build_client()?.resolve_cik(symbol).await
215}
216
217/// Fetch filing history and company metadata for a CIK.
218///
219/// Returns the most recent ~1000 filings inline, with references to
220/// additional history files for older filings.
221///
222/// # Example
223///
224/// ```no_run
225/// use finance_query::edgar;
226///
227/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
228/// edgar::init("user@example.com")?;
229/// let cik = edgar::resolve_cik("AAPL").await?;
230/// let submissions = edgar::submissions(cik).await?;
231/// println!("Company: {:?}", submissions.name);
232/// # Ok(())
233/// # }
234/// ```
235pub async fn submissions(cik: u64) -> Result<EdgarSubmissions> {
236    build_client()?.submissions(cik).await
237}
238
239/// Fetch structured XBRL financial data for a CIK.
240///
241/// Returns all extracted XBRL facts organized by taxonomy (us-gaap, ifrs, dei).
242/// This can be a large response (several MB for major companies).
243///
244/// # Example
245///
246/// ```no_run
247/// use finance_query::edgar;
248///
249/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
250/// edgar::init("user@example.com")?;
251/// let cik = edgar::resolve_cik("AAPL").await?;
252/// let facts = edgar::company_facts(cik).await?;
253/// println!("Entity: {:?}", facts.entity_name);
254/// # Ok(())
255/// # }
256/// ```
257pub async fn company_facts(cik: u64) -> Result<CompanyFacts> {
258    build_client()?.company_facts(cik).await
259}
260
261/// Fetch the filing index for a specific accession number.
262///
263/// This provides the file list for a filing, which can be used to locate
264/// the primary HTML document and file sizes.
265///
266/// # Example
267///
268/// ```no_run
269/// use finance_query::edgar;
270///
271/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
272/// edgar::init("user@example.com")?;
273/// let index = edgar::filing_index("0000320193-24-000123").await?;
274/// println!("Files: {}", index.directory.item.len());
275/// # Ok(())
276/// # }
277/// ```
278pub async fn filing_index(accession_number: &str) -> Result<EdgarFilingIndex> {
279    build_client()?.filing_index(accession_number).await
280}
281
282/// Search SEC EDGAR filings by text content.
283///
284/// # Arguments
285///
286/// * `query` - Search term or phrase
287/// * `forms` - Optional form type filter (e.g., `&["10-K", "10-Q"]`)
288/// * `start_date` - Optional start date (YYYY-MM-DD)
289/// * `end_date` - Optional end date (YYYY-MM-DD)
290/// * `from` - Optional pagination offset (default: 0)
291/// * `size` - Optional page size (default: 100, max: 100)
292///
293/// # Example
294///
295/// ```no_run
296/// use finance_query::edgar;
297///
298/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
299/// edgar::init("user@example.com")?;
300/// let results = edgar::search(
301///     "artificial intelligence",
302///     Some(&["10-K"]),
303///     Some("2024-01-01"),
304///     None,
305///     Some(0),
306///     Some(100),
307/// ).await?;
308/// if let Some(hits_container) = &results.hits {
309///     println!("Found {} results", hits_container.total.as_ref().and_then(|t| t.value).unwrap_or(0));
310/// }
311/// # Ok(())
312/// # }
313/// ```
314pub async fn search(
315    query: &str,
316    forms: Option<&[&str]>,
317    start_date: Option<&str>,
318    end_date: Option<&str>,
319    from: Option<usize>,
320    size: Option<usize>,
321) -> Result<EdgarSearchResults> {
322    build_client()?
323        .search(query, forms, start_date, end_date, from, size)
324        .await
325}
326
327#[cfg(test)]
328mod tests {
329    use super::*;
330
331    #[test]
332    fn test_init_sets_singleton() {
333        let result = init("test@example.com");
334        assert!(result.is_ok() || result.is_err()); // May already be initialized
335    }
336
337    #[test]
338    fn test_double_init_fails() {
339        let _ = init("first@example.com");
340        let result = init("second@example.com");
341        assert!(matches!(result, Err(FinanceError::InvalidParameter { .. })));
342    }
343
344    #[test]
345    fn test_singleton_is_set_after_init() {
346        let _ = init("test@example.com");
347        assert!(EDGAR_SINGLETON.get().is_some());
348    }
349}