Skip to main content

finance_query/models/edgar/
submissions.rs

1//! EDGAR Submissions API models.
2//!
3//! Models for the filing history and company metadata from
4//! `https://data.sec.gov/submissions/CIK{padded}.json`.
5
6use serde::{Deserialize, Deserializer, Serialize};
7
8/// Deserialize empty strings as None
9fn deserialize_empty_string_as_none<'de, D>(deserializer: D) -> Result<Option<String>, D::Error>
10where
11    D: Deserializer<'de>,
12{
13    let s = Option::<String>::deserialize(deserializer)?;
14    Ok(s.filter(|s| !s.is_empty()))
15}
16
17/// Deserialize Vec<String>, filtering out empty strings
18fn deserialize_vec_string_filter_empty<'de, D>(deserializer: D) -> Result<Vec<String>, D::Error>
19where
20    D: Deserializer<'de>,
21{
22    let vec = Vec::<String>::deserialize(deserializer)?;
23    Ok(vec.into_iter().filter(|s| !s.is_empty()).collect())
24}
25
26/// Full submissions response for a company from SEC EDGAR.
27///
28/// Contains company metadata and filing history. The `filings` field holds
29/// the most recent ~1000 filings inline, with links to older history files.
30#[derive(Debug, Clone, Serialize, Deserialize)]
31#[serde(rename_all = "camelCase")]
32#[non_exhaustive]
33pub struct EdgarSubmissions {
34    /// CIK number (as string)
35    #[serde(default, deserialize_with = "deserialize_empty_string_as_none")]
36    pub cik: Option<String>,
37
38    /// Company name
39    #[serde(default, deserialize_with = "deserialize_empty_string_as_none")]
40    pub name: Option<String>,
41
42    /// Entity type (e.g., "operating")
43    #[serde(default, deserialize_with = "deserialize_empty_string_as_none")]
44    pub entity_type: Option<String>,
45
46    /// Standard Industrial Classification code
47    #[serde(default, deserialize_with = "deserialize_empty_string_as_none")]
48    pub sic: Option<String>,
49
50    /// SIC description
51    #[serde(default, deserialize_with = "deserialize_empty_string_as_none")]
52    pub sic_description: Option<String>,
53
54    /// Ticker symbols associated with this entity
55    #[serde(default)]
56    pub tickers: Vec<String>,
57
58    /// Stock exchanges
59    #[serde(default)]
60    pub exchanges: Vec<String>,
61
62    /// State of incorporation
63    #[serde(default, deserialize_with = "deserialize_empty_string_as_none")]
64    pub state_of_incorporation: Option<String>,
65
66    /// Fiscal year end (MMDD format, e.g., "0930" for September 30)
67    #[serde(default, deserialize_with = "deserialize_empty_string_as_none")]
68    pub fiscal_year_end: Option<String>,
69
70    /// Employer Identification Number
71    #[serde(default, deserialize_with = "deserialize_empty_string_as_none")]
72    pub ein: Option<String>,
73
74    /// Company website (often empty in SEC data)
75    #[serde(default, deserialize_with = "deserialize_empty_string_as_none")]
76    pub website: Option<String>,
77
78    /// Filer category (e.g., "Large accelerated filer")
79    #[serde(default, deserialize_with = "deserialize_empty_string_as_none")]
80    pub category: Option<String>,
81
82    /// Whether insider transaction data exists for this entity as owner (0 or 1)
83    #[serde(default)]
84    pub insider_transaction_for_owner_exists: Option<u8>,
85
86    /// Whether insider transaction data exists for this entity as issuer (0 or 1)
87    #[serde(default)]
88    pub insider_transaction_for_issuer_exists: Option<u8>,
89
90    /// Filing history
91    #[serde(default)]
92    pub filings: Option<EdgarFilings>,
93}
94
95/// Container for recent filings and links to older filing history files.
96#[derive(Debug, Clone, Serialize, Deserialize)]
97#[serde(rename_all = "camelCase")]
98#[non_exhaustive]
99pub struct EdgarFilings {
100    /// Recent filings (up to ~1000, inline in the response)
101    #[serde(default)]
102    pub recent: Option<EdgarFilingRecent>,
103
104    /// Links to additional filing history JSON files
105    #[serde(default)]
106    pub files: Vec<EdgarFilingFile>,
107}
108
109/// Reference to an additional filing history file for older filings.
110#[derive(Debug, Clone, Serialize, Deserialize)]
111#[serde(rename_all = "camelCase")]
112#[non_exhaustive]
113pub struct EdgarFilingFile {
114    /// Filename of the additional filings JSON (relative to submissions URL)
115    #[serde(default, deserialize_with = "deserialize_empty_string_as_none")]
116    pub name: Option<String>,
117
118    /// Number of filings in this file
119    #[serde(default)]
120    pub filing_count: Option<u32>,
121
122    /// Earliest filing date in this file
123    #[serde(default, deserialize_with = "deserialize_empty_string_as_none")]
124    pub filing_from: Option<String>,
125
126    /// Latest filing date in this file
127    #[serde(default, deserialize_with = "deserialize_empty_string_as_none")]
128    pub filing_to: Option<String>,
129}
130
131/// Recent filings data stored as parallel arrays.
132///
133/// EDGAR returns filing data as parallel arrays (each field is a `Vec` of the same length).
134/// Use [`to_filings()`](EdgarFilingRecent::to_filings) to convert to a `Vec<EdgarFiling>`.
135#[derive(Debug, Clone, Serialize, Deserialize)]
136#[serde(rename_all = "camelCase")]
137#[non_exhaustive]
138pub struct EdgarFilingRecent {
139    /// Accession numbers (unique filing identifiers)
140    #[serde(default, deserialize_with = "deserialize_vec_string_filter_empty")]
141    pub accession_number: Vec<String>,
142
143    /// Filing dates (YYYY-MM-DD)
144    #[serde(default, deserialize_with = "deserialize_vec_string_filter_empty")]
145    pub filing_date: Vec<String>,
146
147    /// Report dates (YYYY-MM-DD, may be empty for some form types)
148    #[serde(default, deserialize_with = "deserialize_vec_string_filter_empty")]
149    pub report_date: Vec<String>,
150
151    /// Acceptance date-times
152    #[serde(default, deserialize_with = "deserialize_vec_string_filter_empty")]
153    pub acceptance_date_time: Vec<String>,
154
155    /// Form types (10-K, 10-Q, 8-K, etc.)
156    #[serde(default, deserialize_with = "deserialize_vec_string_filter_empty")]
157    pub form: Vec<String>,
158
159    /// Filing sizes in bytes
160    #[serde(default)]
161    pub size: Vec<u64>,
162
163    /// Whether the filing is XBRL
164    #[serde(default, rename = "isXBRL")]
165    pub is_xbrl: Vec<u8>,
166
167    /// Whether the filing is Inline XBRL
168    #[serde(default, rename = "isInlineXBRL")]
169    pub is_inline_xbrl: Vec<u8>,
170
171    /// Primary document filenames
172    #[serde(default, deserialize_with = "deserialize_vec_string_filter_empty")]
173    pub primary_document: Vec<String>,
174
175    /// Primary document descriptions
176    #[serde(default, deserialize_with = "deserialize_vec_string_filter_empty")]
177    pub primary_doc_description: Vec<String>,
178}
179
180impl EdgarFilingRecent {
181    /// Convert parallel arrays into a vector of individual filings.
182    ///
183    /// # Example
184    ///
185    /// ```no_run
186    /// # use finance_query::EdgarSubmissions;
187    /// # fn example(submissions: EdgarSubmissions) {
188    /// if let Some(filings) = &submissions.filings {
189    ///     if let Some(recent) = &filings.recent {
190    ///         for filing in recent.to_filings() {
191    ///             println!("{}: {} ({})", filing.filing_date, filing.form, filing.primary_doc_description);
192    ///         }
193    ///     }
194    /// }
195    /// # }
196    /// ```
197    pub fn to_filings(&self) -> Vec<EdgarFiling> {
198        let len = self.accession_number.len();
199        (0..len)
200            .map(|i| EdgarFiling {
201                accession_number: self.accession_number.get(i).cloned().unwrap_or_default(),
202                filing_date: self.filing_date.get(i).cloned().unwrap_or_default(),
203                report_date: self.report_date.get(i).cloned().unwrap_or_default(),
204                acceptance_date_time: self
205                    .acceptance_date_time
206                    .get(i)
207                    .cloned()
208                    .unwrap_or_default(),
209                form: self.form.get(i).cloned().unwrap_or_default(),
210                size: self.size.get(i).copied().unwrap_or(0),
211                is_xbrl: self.is_xbrl.get(i).copied().unwrap_or(0) != 0,
212                is_inline_xbrl: self.is_inline_xbrl.get(i).copied().unwrap_or(0) != 0,
213                primary_document: self.primary_document.get(i).cloned().unwrap_or_default(),
214                primary_doc_description: self
215                    .primary_doc_description
216                    .get(i)
217                    .cloned()
218                    .unwrap_or_default(),
219            })
220            .collect()
221    }
222
223    /// Convert filings to a polars DataFrame (requires `dataframe` feature).
224    ///
225    /// # Example
226    ///
227    /// ```no_run
228    /// # #[cfg(feature = "dataframe")]
229    /// # use finance_query::EdgarSubmissions;
230    /// # #[cfg(feature = "dataframe")]
231    /// # fn example(submissions: EdgarSubmissions) -> Result<(), Box<dyn std::error::Error>> {
232    /// if let Some(filings) = &submissions.filings {
233    ///     if let Some(recent) = &filings.recent {
234    ///         let df = recent.to_dataframe()?;
235    ///         println!("Filings DataFrame: {:?}", df);
236    ///     }
237    /// }
238    /// # Ok(())
239    /// # }
240    /// ```
241    #[cfg(feature = "dataframe")]
242    pub fn to_dataframe(&self) -> ::polars::prelude::PolarsResult<::polars::prelude::DataFrame> {
243        let filings = self.to_filings();
244        EdgarFiling::vec_to_dataframe(&filings)
245    }
246}
247
248/// A single SEC filing with metadata.
249///
250/// Derived from the parallel arrays in [`EdgarFilingRecent`] via
251/// [`to_filings()`](EdgarFilingRecent::to_filings).
252#[derive(Debug, Clone, Serialize, Deserialize)]
253#[cfg_attr(feature = "dataframe", derive(crate::ToDataFrame))]
254#[non_exhaustive]
255pub struct EdgarFiling {
256    /// Accession number (unique filing identifier, e.g., "0000320193-24-000123")
257    pub accession_number: String,
258    /// Filing date (YYYY-MM-DD)
259    pub filing_date: String,
260    /// Report date (YYYY-MM-DD)
261    pub report_date: String,
262    /// Acceptance date-time
263    pub acceptance_date_time: String,
264    /// Form type (e.g., "10-K", "10-Q", "8-K")
265    pub form: String,
266    /// Filing size in bytes
267    pub size: u64,
268    /// Whether the filing contains XBRL data
269    pub is_xbrl: bool,
270    /// Whether the filing contains Inline XBRL data
271    pub is_inline_xbrl: bool,
272    /// Primary document filename
273    pub primary_document: String,
274    /// Primary document description
275    pub primary_doc_description: String,
276}
277
278impl EdgarFiling {
279    /// Create a new EdgarFiling instance.
280    ///
281    /// This constructor is provided to support creating EdgarFiling instances outside
282    /// the library crate, since the struct is marked `#[non_exhaustive]`.
283    #[allow(clippy::too_many_arguments)]
284    pub fn new(
285        accession_number: String,
286        filing_date: String,
287        report_date: String,
288        acceptance_date_time: String,
289        form: String,
290        size: u64,
291        is_xbrl: bool,
292        is_inline_xbrl: bool,
293        primary_document: String,
294        primary_doc_description: String,
295    ) -> Self {
296        Self {
297            accession_number,
298            filing_date,
299            report_date,
300            acceptance_date_time,
301            form,
302            size,
303            is_xbrl,
304            is_inline_xbrl,
305            primary_document,
306            primary_doc_description,
307        }
308    }
309
310    /// Get the URL to view this filing on SEC EDGAR.
311    ///
312    /// # Example
313    ///
314    /// ```no_run
315    /// use finance_query::edgar;
316    ///
317    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
318    /// edgar::init("user@example.com")?;
319    /// let cik = edgar::resolve_cik("AAPL").await?;
320    /// let submissions = edgar::submissions(cik).await?;
321    ///
322    /// if let Some(filings) = &submissions.filings {
323    ///     if let Some(recent) = &filings.recent {
324    ///         for filing in recent.to_filings() {
325    ///             let url = filing.edgar_url();
326    ///             println!("Filing URL: {}", url);
327    ///         }
328    ///     }
329    /// }
330    /// # Ok(())
331    /// # }
332    /// ```
333    pub fn edgar_url(&self) -> String {
334        // Accession number format: CIK-YY-SEQUENCE (e.g., "0000320193-24-000123")
335        // Extract CIK (digits before first dash) and remove all dashes for URL path
336        let cik = self
337            .accession_number
338            .split('-')
339            .next()
340            .unwrap_or("")
341            .trim_start_matches('0'); // Remove leading zeros for URL
342        let accession_no_dashes = self.accession_number.replace('-', "");
343
344        if self.primary_document.is_empty() {
345            // For search results without primary_document, link to the filing directory
346            // SEC will show the file listing where users can click the report
347            format!(
348                "https://www.sec.gov/cgi-bin/viewer?action=view&cik={}&accession_number={}&xbrl_type=v",
349                cik, accession_no_dashes
350            )
351        } else {
352            // For submissions with primary_document, link directly to document
353            format!(
354                "https://www.sec.gov/Archives/edgar/data/{}/{}/{}",
355                cik, accession_no_dashes, self.primary_document
356            )
357        }
358    }
359}
360
361#[cfg(test)]
362mod tests {
363    use super::*;
364
365    #[test]
366    #[cfg(feature = "dataframe")]
367    fn test_edgar_filing_dataframe_conversion() {
368        let filings_data = EdgarFilingRecent {
369            accession_number: vec!["0000320193-24-000123".to_string()],
370            filing_date: vec!["2024-11-01".to_string()],
371            report_date: vec!["2024-09-28".to_string()],
372            acceptance_date_time: vec!["2024-11-01T16:30:00.000Z".to_string()],
373            form: vec!["10-K".to_string()],
374            size: vec![15000000],
375            is_xbrl: vec![1],
376            is_inline_xbrl: vec![1],
377            primary_document: vec!["aapl-20240928.htm".to_string()],
378            primary_doc_description: vec!["10-K".to_string()],
379        };
380
381        let df = filings_data.to_dataframe().unwrap();
382        assert_eq!(df.height(), 1);
383        let col_names = df.get_column_names_owned();
384        assert!(col_names.iter().any(|n| n.as_str() == "accession_number"));
385        assert!(col_names.iter().any(|n| n.as_str() == "filing_date"));
386        assert!(col_names.iter().any(|n| n.as_str() == "form"));
387    }
388
389    #[test]
390    fn test_deserialize_submissions() {
391        let json = r#"{
392            "cik": "0000320193",
393            "entityType": "operating",
394            "sic": "3571",
395            "sicDescription": "Electronic Computers",
396            "name": "Apple Inc.",
397            "tickers": ["AAPL"],
398            "exchanges": ["Nasdaq"],
399            "stateOfIncorporation": "CA",
400            "fiscalYearEnd": "0930",
401            "website": "https://www.apple.com",
402            "category": "Large accelerated filer",
403            "filings": {
404                "recent": {
405                    "accessionNumber": ["0000320193-24-000123", "0000320193-24-000100"],
406                    "filingDate": ["2024-11-01", "2024-08-02"],
407                    "reportDate": ["2024-09-28", "2024-06-29"],
408                    "acceptanceDateTime": ["2024-11-01T16:30:00.000Z", "2024-08-02T16:15:00.000Z"],
409                    "form": ["10-K", "10-Q"],
410                    "size": [15000000, 8000000],
411                    "isXBRL": [1, 1],
412                    "isInlineXBRL": [1, 1],
413                    "primaryDocument": ["aapl-20240928.htm", "aapl-20240629.htm"],
414                    "primaryDocDescription": ["10-K", "10-Q"]
415                },
416                "files": []
417            }
418        }"#;
419
420        let submissions: EdgarSubmissions = serde_json::from_str(json).unwrap();
421        assert_eq!(submissions.name.as_deref(), Some("Apple Inc."));
422        assert_eq!(submissions.tickers, vec!["AAPL"]);
423        assert_eq!(submissions.sic.as_deref(), Some("3571"));
424
425        let filings = submissions.filings.unwrap();
426        let recent = filings.recent.unwrap();
427        assert_eq!(recent.accession_number.len(), 2);
428
429        let individual = recent.to_filings();
430        assert_eq!(individual.len(), 2);
431        assert_eq!(individual[0].form, "10-K");
432        assert_eq!(individual[1].form, "10-Q");
433        assert!(individual[0].is_xbrl);
434    }
435
436    #[test]
437    fn test_empty_string_deserialization() {
438        let json = r#"{
439            "cik": "0000320193",
440            "name": "Test Company",
441            "website": "",
442            "ein": "",
443            "tickers": [],
444            "exchanges": [],
445            "filings": {
446                "recent": {
447                    "accessionNumber": ["123"],
448                    "filingDate": ["2024-01-01"],
449                    "reportDate": [""],
450                    "acceptanceDateTime": [""],
451                    "form": ["4"],
452                    "size": [100],
453                    "isXBRL": [0],
454                    "isInlineXBRL": [0],
455                    "primaryDocument": ["doc.xml"],
456                    "primaryDocDescription": [""]
457                }
458            }
459        }"#;
460
461        let submissions: EdgarSubmissions = serde_json::from_str(json).unwrap();
462        assert_eq!(submissions.name.as_deref(), Some("Test Company"));
463        // Empty strings should be None
464        assert_eq!(submissions.website, None);
465        assert_eq!(submissions.ein, None);
466
467        let filings = submissions.filings.as_ref().unwrap();
468        let recent = filings.recent.as_ref().unwrap();
469        // Empty strings should be filtered out from Vec<String>
470        assert_eq!(recent.accession_number, vec!["123"]);
471        assert_eq!(recent.report_date, Vec::<String>::new()); // Empty string filtered out
472        assert_eq!(recent.acceptance_date_time, Vec::<String>::new()); // Empty string filtered out
473        assert_eq!(recent.primary_doc_description, Vec::<String>::new()); // Empty string filtered out
474
475        // Test round-trip: serialize back to JSON and verify None becomes null
476        let serialized = serde_json::to_value(&submissions).unwrap();
477        assert_eq!(serialized["website"], serde_json::Value::Null);
478        assert_eq!(serialized["ein"], serde_json::Value::Null);
479    }
480
481    #[test]
482    fn test_edgar_filing_url() {
483        let filing = EdgarFiling {
484            accession_number: "0000320193-25-000079".to_string(),
485            filing_date: "2025-10-30".to_string(),
486            report_date: "2025-09-27".to_string(),
487            acceptance_date_time: String::new(),
488            form: "10-K".to_string(),
489            size: 15000000,
490            is_xbrl: true,
491            is_inline_xbrl: true,
492            primary_document: "aapl-20250927.htm".to_string(),
493            primary_doc_description: "10-K".to_string(),
494        };
495
496        let url = filing.edgar_url();
497        assert_eq!(
498            url,
499            "https://www.sec.gov/Archives/edgar/data/320193/000032019325000079/aapl-20250927.htm"
500        );
501
502        // Verify URL format is correct (can be manually verified in browser)
503        println!("Apple 10-K URL: {}", url);
504    }
505
506    #[test]
507    fn test_edgar_filing_url_with_different_cik() {
508        let filing = EdgarFiling {
509            accession_number: "0000950170-25-100235".to_string(),
510            filing_date: "2025-08-01".to_string(),
511            report_date: "2025-06-30".to_string(),
512            acceptance_date_time: String::new(),
513            form: "10-K".to_string(),
514            size: 8000000,
515            is_xbrl: true,
516            is_inline_xbrl: true,
517            primary_document: "msft-20250630.htm".to_string(),
518            primary_doc_description: "10-K".to_string(),
519        };
520
521        let url = filing.edgar_url();
522        assert_eq!(
523            url,
524            "https://www.sec.gov/Archives/edgar/data/950170/000095017025100235/msft-20250630.htm"
525        );
526
527        // Verify URL format is correct (can be manually verified in browser)
528        println!("Microsoft 10-K URL: {}", url);
529    }
530}