Skip to main content

finance_query/models/edgar/
search.rs

1//! EDGAR Full-Text Search (EFTS) models.
2//!
3//! Models for results from the SEC EDGAR full-text search API at
4//! `https://efts.sec.gov/LATEST/search-index`.
5
6use serde::{Deserialize, Serialize};
7
8/// Full-text search results from SEC EDGAR.
9#[derive(Debug, Clone, Serialize, Deserialize)]
10#[non_exhaustive]
11pub struct EdgarSearchResults {
12    /// The search query that was executed (Elasticsearch query DSL, stored as raw JSON)
13    #[serde(default)]
14    pub query: Option<serde_json::Value>,
15
16    /// Nested hits container
17    #[serde(default)]
18    pub hits: Option<EdgarSearchHitsContainer>,
19}
20
21#[cfg(feature = "dataframe")]
22impl EdgarSearchResults {
23    /// Convert search results to a polars DataFrame.
24    ///
25    /// Extracts the `_source` data from each hit and converts to a DataFrame.
26    ///
27    /// # Example
28    ///
29    /// ```no_run
30    /// # #[cfg(feature = "dataframe")]
31    /// # use finance_query::edgar;
32    /// # #[cfg(feature = "dataframe")]
33    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
34    /// # edgar::init("user@example.com")?;
35    /// let results = edgar::search("revenue", Some(&["10-K"]), None, None, None, Some(100)).await?;
36    /// let df = results.to_dataframe()?;
37    /// println!("Search results DataFrame: {:?}", df);
38    /// # Ok(())
39    /// # }
40    /// ```
41    pub fn to_dataframe(&self) -> ::polars::prelude::PolarsResult<::polars::prelude::DataFrame> {
42        let sources: Vec<EdgarSearchSource> = self
43            .hits
44            .as_ref()
45            .map(|h| &h.hits)
46            .map(|hits| hits.iter().filter_map(|hit| hit._source.clone()).collect())
47            .unwrap_or_default();
48
49        EdgarSearchSource::vec_to_dataframe(&sources)
50    }
51}
52
53/// Container for search hits with metadata.
54#[derive(Debug, Clone, Serialize, Deserialize)]
55#[non_exhaustive]
56pub struct EdgarSearchHitsContainer {
57    /// Total number of matching results
58    #[serde(default)]
59    pub total: Option<EdgarSearchTotal>,
60
61    /// Maximum score
62    #[serde(default)]
63    pub max_score: Option<f64>,
64
65    /// Search result hits
66    #[serde(default)]
67    pub hits: Vec<EdgarSearchHit>,
68}
69
70/// Total count information for search results.
71#[derive(Debug, Clone, Serialize, Deserialize)]
72#[non_exhaustive]
73pub struct EdgarSearchTotal {
74    /// Total number of matching documents
75    #[serde(default)]
76    pub value: Option<u64>,
77
78    /// Relation to the actual total (e.g., "eq" for exact, "gte" for 10000+)
79    #[serde(default)]
80    pub relation: Option<String>,
81}
82
83/// A single search result hit from EDGAR full-text search (Elasticsearch format).
84#[derive(Debug, Clone, Serialize, Deserialize)]
85#[non_exhaustive]
86pub struct EdgarSearchHit {
87    /// Elasticsearch index name
88    #[serde(default)]
89    pub _index: Option<String>,
90
91    /// Hit ID
92    #[serde(default)]
93    pub _id: Option<String>,
94
95    /// Relevance score
96    #[serde(default)]
97    pub _score: Option<f64>,
98
99    /// The actual filing data
100    #[serde(default)]
101    pub _source: Option<EdgarSearchSource>,
102}
103
104/// Source data for a search hit containing the actual filing information.
105#[derive(Debug, Clone, Serialize, Deserialize)]
106#[cfg_attr(feature = "dataframe", derive(crate::ToDataFrame))]
107#[non_exhaustive]
108pub struct EdgarSearchSource {
109    /// CIK numbers (as strings)
110    #[serde(default)]
111    pub ciks: Vec<String>,
112
113    /// Filing date (YYYY-MM-DD)
114    #[serde(default)]
115    pub file_date: Option<String>,
116
117    /// Form type (e.g., "10-K", "10-Q", "8-K")
118    #[serde(default)]
119    pub form: Option<String>,
120
121    /// Accession number (EDGAR document ID)
122    #[serde(default)]
123    pub adsh: Option<String>,
124
125    /// Display names (company name with ticker)
126    #[serde(default)]
127    pub display_names: Vec<String>,
128
129    /// Period ending date
130    #[serde(default)]
131    pub period_ending: Option<String>,
132
133    /// Root form types
134    #[serde(default)]
135    pub root_forms: Vec<String>,
136
137    /// Standard Industrial Classification codes
138    #[serde(default)]
139    pub sics: Vec<String>,
140}
141
142#[cfg(test)]
143mod tests {
144    use super::*;
145
146    #[test]
147    #[cfg(feature = "dataframe")]
148    fn test_search_results_dataframe_conversion() {
149        let results = EdgarSearchResults {
150            query: Some(serde_json::json!({"query": {"match": {"doc_text": "test"}}})),
151            hits: Some(EdgarSearchHitsContainer {
152                total: Some(EdgarSearchTotal {
153                    value: Some(1),
154                    relation: Some("eq".to_string()),
155                }),
156                max_score: Some(1.5),
157                hits: vec![EdgarSearchHit {
158                    _index: Some("edgar-filings".to_string()),
159                    _id: Some("1".to_string()),
160                    _score: Some(1.5),
161                    _source: Some(EdgarSearchSource {
162                        ciks: vec!["320193".to_string()],
163                        file_date: Some("2024-11-01".to_string()),
164                        form: Some("10-K".to_string()),
165                        adsh: Some("0000320193-24-000123".to_string()),
166                        display_names: vec!["Apple Inc. (AAPL)".to_string()],
167                        period_ending: Some("2024-09-28".to_string()),
168                        root_forms: vec!["10-K".to_string()],
169                        sics: vec!["3571".to_string()],
170                    }),
171                }],
172            }),
173        };
174
175        let df = results.to_dataframe().unwrap();
176        assert_eq!(df.height(), 1);
177        let col_names = df.get_column_names_owned();
178        assert!(col_names.iter().any(|n| n.as_str() == "form"));
179        assert!(col_names.iter().any(|n| n.as_str() == "file_date"));
180    }
181
182    #[test]
183    fn test_deserialize_search_results() {
184        let json = r#"{
185            "query": {"query": {"match": {"doc_text": "test"}}},
186            "hits": {
187                "total": {
188                    "value": 10000,
189                    "relation": "gte"
190                },
191                "max_score": 1.5,
192                "hits": [
193                    {
194                        "_index": "edgar-filings",
195                        "_id": "1",
196                        "_score": 1.5,
197                        "_source": {
198                            "ciks": ["320193"],
199                            "file_date": "2024-11-01",
200                            "form": "10-K",
201                            "adsh": "0000320193-24-000123",
202                            "display_names": ["Apple Inc. (AAPL)"],
203                            "period_ending": "2024-09-28",
204                            "root_forms": ["10-K"],
205                            "sics": ["3571"]
206                        }
207                    }
208                ]
209            }
210        }"#;
211
212        let results: EdgarSearchResults = serde_json::from_str(json).unwrap();
213        assert!(results.query.is_some());
214        let hits_container = results.hits.as_ref().unwrap();
215        assert_eq!(hits_container.total.as_ref().unwrap().value, Some(10000));
216        assert_eq!(hits_container.hits.len(), 1);
217
218        let first_hit = &hits_container.hits[0];
219        let source = first_hit._source.as_ref().unwrap();
220        assert_eq!(source.ciks, vec!["320193"]);
221        assert_eq!(source.form.as_deref(), Some("10-K"));
222        assert!(!source.display_names.is_empty());
223    }
224}