Skip to main content

finance_query/adapters/edgar/
mod.rs

1//! SEC EDGAR API client.
2//!
3//! Provides access to SEC EDGAR data including filing history,
4//! structured XBRL financial data, and full-text search.
5//!
6//! All requests are rate-limited to 10 per second as required by SEC.
7//! Rate limiting and CIK caching are managed via a process-global singleton.
8//!
9//! # Quick Start
10//!
11//! Initialize once at application startup, then use anywhere:
12//!
13//! ```no_run
14//! use finance_query::edgar;
15//!
16//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
17//! // Initialize once (required)
18//! edgar::init("user@example.com")?;
19//!
20//! // Use anywhere
21//! let cik = edgar::resolve_cik("AAPL").await?;
22//! let submissions = edgar::submissions(cik).await?;
23//! let facts = edgar::company_facts(cik).await?;
24//!
25//! // Search filings
26//! let results = edgar::search(
27//!     "artificial intelligence",
28//!     Some(&["10-K"]),
29//!     Some("2024-01-01"),
30//!     None,
31//!     None,
32//!     None,
33//! ).await?;
34//! # Ok(())
35//! # }
36//! ```
37
38mod client;
39mod endpoints;
40
41use crate::error::{FinanceError, Result};
42use crate::models::filings::{
43    CompanyFacts, EdgarFilingIndex, EdgarSearchResults, EdgarSubmissions,
44};
45use crate::rate_limiter::RateLimiter;
46use client::EdgarClientBuilder;
47use std::collections::HashMap;
48use std::sync::{Arc, OnceLock};
49use std::time::Duration;
50use tokio::sync::RwLock;
51
52/// SEC EDGAR rate limit: 10 requests per second.
53const EDGAR_RATE_PER_SEC: f64 = 10.0;
54
55/// Stable configuration stored in the EDGAR process-global singleton.
56///
57/// Only configuration, the rate limiter, and the CIK cache are stored — NOT
58/// the `reqwest::Client`. `reqwest::Client` internally spawns hyper
59/// connection-pool tasks on whichever tokio runtime first uses them; when that
60/// runtime is dropped (e.g. at the end of a `#[tokio::test]`), those tasks die
61/// and subsequent calls from a different runtime receive `DispatchGone`. A fresh
62/// `reqwest::Client` is built per public function call via
63/// [`EdgarClientBuilder::build_with_shared_state`], reusing the shared rate
64/// limiter and CIK cache.
65struct EdgarSingleton {
66    email: String,
67    app_name: String,
68    timeout: Duration,
69    rate_limiter: Arc<RateLimiter>,
70    cik_cache: Arc<RwLock<Option<HashMap<String, u64>>>>,
71}
72
73static EDGAR_SINGLETON: OnceLock<EdgarSingleton> = OnceLock::new();
74
75/// Initialize the global EDGAR client with a contact email.
76///
77/// This function must be called once before using any EDGAR functions.
78/// The SEC requires all automated requests to include a User-Agent header
79/// with a contact email address.
80///
81/// # Arguments
82///
83/// * `email` - Contact email address (included in User-Agent header)
84///
85/// # Example
86///
87/// ```no_run
88/// use finance_query::edgar;
89///
90/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
91/// edgar::init("user@example.com")?;
92/// # Ok(())
93/// # }
94/// ```
95///
96/// # Errors
97///
98/// Returns an error if EDGAR has already been initialized.
99pub fn init(email: impl Into<String>) -> Result<()> {
100    init_with_config(email, "finance-query", Duration::from_secs(30))
101}
102
103/// Initialize the global EDGAR client with full configuration.
104///
105/// Use this for custom app name and timeout settings.
106///
107/// # Arguments
108///
109/// * `email` - Contact email address (required by SEC)
110/// * `app_name` - Application name (included in User-Agent)
111/// * `timeout` - HTTP request timeout duration
112///
113/// # Example
114///
115/// ```no_run
116/// use finance_query::edgar;
117/// use std::time::Duration;
118///
119/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
120/// edgar::init_with_config(
121///     "user@example.com",
122///     "my-app",
123///     Duration::from_secs(60),
124/// )?;
125/// # Ok(())
126/// # }
127/// ```
128pub fn init_with_config(
129    email: impl Into<String>,
130    app_name: impl Into<String>,
131    timeout: Duration,
132) -> Result<()> {
133    EDGAR_SINGLETON
134        .set(EdgarSingleton {
135            email: email.into(),
136            app_name: app_name.into(),
137            timeout,
138            rate_limiter: Arc::new(RateLimiter::new(EDGAR_RATE_PER_SEC)),
139            cik_cache: Arc::new(RwLock::new(None)),
140        })
141        .map_err(|_| FinanceError::InvalidParameter {
142            param: "edgar".to_string(),
143            reason: "EDGAR client already initialized".to_string(),
144        })
145}
146
147/// Build a fresh [`EdgarClient`](client::EdgarClient) from the singleton's
148/// config, reusing the shared rate limiter and CIK cache.
149///
150/// If EDGAR hasn't been explicitly initialized, falls back to the `EDGAR_EMAIL`
151/// environment variable as a convenience (consistent with other adapters).
152fn build_client() -> Result<client::EdgarClient> {
153    if EDGAR_SINGLETON.get().is_none()
154        && let Ok(email) = std::env::var("EDGAR_EMAIL")
155    {
156        let _ = EDGAR_SINGLETON.set(EdgarSingleton {
157            email,
158            app_name: "finance-query".to_string(),
159            timeout: Duration::from_secs(30),
160            rate_limiter: Arc::new(RateLimiter::new(EDGAR_RATE_PER_SEC)),
161            cik_cache: Arc::new(RwLock::new(None)),
162        });
163    }
164    let s = EDGAR_SINGLETON
165        .get()
166        .ok_or_else(|| FinanceError::InvalidParameter {
167            param: "edgar".to_string(),
168            reason: "EDGAR_EMAIL not set. Call edgar::init(email) or set EDGAR_EMAIL env var."
169                .to_string(),
170        })?;
171    EdgarClientBuilder::new(&s.email)
172        .app_name(&s.app_name)
173        .timeout(s.timeout)
174        .build_with_shared_state(Arc::clone(&s.rate_limiter), Arc::clone(&s.cik_cache))
175}
176
177fn accession_parts(accession_number: &str) -> Result<(String, String)> {
178    let cik_part = accession_number
179        .split('-')
180        .next()
181        .unwrap_or("")
182        .trim_start_matches('0')
183        .to_string();
184    let accession_no_dashes = accession_number.replace('-', "");
185
186    if cik_part.is_empty() || accession_no_dashes.is_empty() {
187        return Err(FinanceError::InvalidParameter {
188            param: "accession_number".to_string(),
189            reason: "Invalid accession number format".to_string(),
190        });
191    }
192
193    Ok((cik_part, accession_no_dashes))
194}
195
196/// Resolve a ticker symbol to its SEC CIK number.
197///
198/// The ticker-to-CIK mapping is fetched once and cached process-wide.
199/// Lookups are case-insensitive.
200///
201/// # Example
202///
203/// ```no_run
204/// use finance_query::edgar;
205///
206/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
207/// edgar::init("user@example.com")?;
208/// let cik = edgar::resolve_cik("AAPL").await?;
209/// assert_eq!(cik, 320193);
210/// # Ok(())
211/// # }
212/// ```
213///
214/// # Errors
215///
216/// Returns an error if:
217/// - EDGAR has not been initialized (call `init()` first)
218/// - Symbol not found in SEC database
219/// - Network request fails
220pub async fn resolve_cik(symbol: &str) -> Result<u64> {
221    build_client()?.resolve_cik(symbol).await
222}
223
224/// Fetch filing history and company metadata for a CIK.
225///
226/// Returns the most recent ~1000 filings inline, with references to
227/// additional history files for older filings.
228///
229/// # Example
230///
231/// ```no_run
232/// use finance_query::edgar;
233///
234/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
235/// edgar::init("user@example.com")?;
236/// let cik = edgar::resolve_cik("AAPL").await?;
237/// let submissions = edgar::submissions(cik).await?;
238/// println!("Company: {:?}", submissions.name);
239/// # Ok(())
240/// # }
241/// ```
242pub async fn submissions(cik: u64) -> Result<EdgarSubmissions> {
243    build_client()?.submissions(cik).await
244}
245
246/// Fetch structured XBRL financial data for a CIK.
247///
248/// Returns all extracted XBRL facts organized by taxonomy (us-gaap, ifrs, dei).
249/// This can be a large response (several MB for major companies).
250///
251/// # Example
252///
253/// ```no_run
254/// use finance_query::edgar;
255///
256/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
257/// edgar::init("user@example.com")?;
258/// let cik = edgar::resolve_cik("AAPL").await?;
259/// let facts = edgar::company_facts(cik).await?;
260/// println!("Entity: {:?}", facts.entity_name);
261/// # Ok(())
262/// # }
263/// ```
264pub async fn company_facts(cik: u64) -> Result<CompanyFacts> {
265    build_client()?.company_facts(cik).await
266}
267
268/// Fetch the filing index for a specific accession number.
269///
270/// This provides the file list for a filing, which can be used to locate
271/// the primary HTML document and file sizes.
272///
273/// # Example
274///
275/// ```no_run
276/// use finance_query::edgar;
277///
278/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
279/// edgar::init("user@example.com")?;
280/// let index = edgar::filing_index("0000320193-24-000123").await?;
281/// println!("Files: {}", index.directory.item.len());
282/// # Ok(())
283/// # }
284/// ```
285pub async fn filing_index(accession_number: &str) -> Result<EdgarFilingIndex> {
286    build_client()?.filing_index(accession_number).await
287}
288
289/// Search SEC EDGAR filings by text content.
290///
291/// # Arguments
292///
293/// * `query` - Search term or phrase
294/// * `forms` - Optional form type filter (e.g., `&["10-K", "10-Q"]`)
295/// * `start_date` - Optional start date (YYYY-MM-DD)
296/// * `end_date` - Optional end date (YYYY-MM-DD)
297/// * `from` - Optional pagination offset (default: 0)
298/// * `size` - Optional page size (default: 100, max: 100)
299///
300/// # Example
301///
302/// ```no_run
303/// use finance_query::edgar;
304///
305/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
306/// edgar::init("user@example.com")?;
307/// let results = edgar::search(
308///     "artificial intelligence",
309///     Some(&["10-K"]),
310///     Some("2024-01-01"),
311///     None,
312///     Some(0),
313///     Some(100),
314/// ).await?;
315/// if let Some(hits_container) = &results.hits {
316///     println!("Found {} results", hits_container.total.as_ref().and_then(|t| t.value).unwrap_or(0));
317/// }
318/// # Ok(())
319/// # }
320/// ```
321pub async fn search(
322    query: &str,
323    forms: Option<&[&str]>,
324    start_date: Option<&str>,
325    end_date: Option<&str>,
326    from: Option<usize>,
327    size: Option<usize>,
328) -> Result<EdgarSearchResults> {
329    build_client()?
330        .search(query, forms, start_date, end_date, from, size)
331        .await
332}
333
334// ============================================================================
335// Canonical model conversion functions
336// ============================================================================
337
338/// Fetch canonical ProviderFilings for a ticker symbol.
339pub async fn fetch_filings_response(
340    symbol: &str,
341) -> Result<crate::models::filings::ProviderFilings> {
342    use crate::models::filings::{ProviderFiling, ProviderFilings};
343
344    let cik_num = resolve_cik(symbol).await?;
345    let subs = submissions(cik_num).await?;
346
347    let cik = subs.cik.clone().unwrap_or_default();
348    let company_name = subs.name.clone();
349    let filings = subs
350        .filings
351        .and_then(|f| f.recent)
352        .map(|r| r.to_filings())
353        .unwrap_or_default()
354        .into_iter()
355        .map(|f| {
356            let accession_no_dashes = f.accession_number.replace('-', "");
357            let url = if !cik.is_empty()
358                && !accession_no_dashes.is_empty()
359                && !f.primary_document.is_empty()
360            {
361                Some(format!(
362                    "https://www.sec.gov/Archives/edgar/data/{}/{}/{}",
363                    cik.trim_start_matches('0'),
364                    accession_no_dashes,
365                    f.primary_document
366                ))
367            } else {
368                None
369            };
370            ProviderFiling {
371                accession_number: Some(f.accession_number),
372                filing_date: Some(f.filing_date),
373                filing_type: Some(f.form),
374                filing_url: url,
375                company_name: company_name.clone(),
376                cik: Some(cik.clone()),
377            }
378        })
379        .collect();
380
381    Ok(ProviderFilings {
382        symbol: symbol.to_string(),
383        filings,
384    })
385}
386
387#[cfg(test)]
388mod tests {
389    use super::*;
390
391    #[test]
392    fn test_init_sets_singleton() {
393        let result = init("test@example.com");
394        assert!(result.is_ok() || result.is_err()); // May already be initialized
395    }
396
397    #[test]
398    fn test_double_init_fails() {
399        let _ = init("first@example.com");
400        let result = init("second@example.com");
401        assert!(matches!(result, Err(FinanceError::InvalidParameter { .. })));
402    }
403
404    #[test]
405    fn test_singleton_is_set_after_init() {
406        let _ = init("test@example.com");
407        assert!(EDGAR_SINGLETON.get().is_some());
408    }
409}