finance_query/models/edgar/
search.rs1use serde::{Deserialize, Serialize};
7
8#[derive(Debug, Clone, Serialize, Deserialize)]
10#[non_exhaustive]
11pub struct EdgarSearchResults {
12 #[serde(default)]
14 pub query: Option<serde_json::Value>,
15
16 #[serde(default)]
18 pub hits: Option<EdgarSearchHitsContainer>,
19}
20
21#[cfg(feature = "dataframe")]
22impl EdgarSearchResults {
23 pub fn to_dataframe(&self) -> ::polars::prelude::PolarsResult<::polars::prelude::DataFrame> {
42 let sources: Vec<EdgarSearchSource> = self
43 .hits
44 .as_ref()
45 .map(|h| &h.hits)
46 .map(|hits| hits.iter().filter_map(|hit| hit._source.clone()).collect())
47 .unwrap_or_default();
48
49 EdgarSearchSource::vec_to_dataframe(&sources)
50 }
51}
52
53#[derive(Debug, Clone, Serialize, Deserialize)]
55#[non_exhaustive]
56pub struct EdgarSearchHitsContainer {
57 #[serde(default)]
59 pub total: Option<EdgarSearchTotal>,
60
61 #[serde(default)]
63 pub max_score: Option<f64>,
64
65 #[serde(default)]
67 pub hits: Vec<EdgarSearchHit>,
68}
69
70#[derive(Debug, Clone, Serialize, Deserialize)]
72#[non_exhaustive]
73pub struct EdgarSearchTotal {
74 #[serde(default)]
76 pub value: Option<u64>,
77
78 #[serde(default)]
80 pub relation: Option<String>,
81}
82
83#[derive(Debug, Clone, Serialize, Deserialize)]
85#[non_exhaustive]
86pub struct EdgarSearchHit {
87 #[serde(default)]
89 pub _index: Option<String>,
90
91 #[serde(default)]
93 pub _id: Option<String>,
94
95 #[serde(default)]
97 pub _score: Option<f64>,
98
99 #[serde(default)]
101 pub _source: Option<EdgarSearchSource>,
102}
103
104#[derive(Debug, Clone, Serialize, Deserialize)]
106#[cfg_attr(feature = "dataframe", derive(crate::ToDataFrame))]
107#[non_exhaustive]
108pub struct EdgarSearchSource {
109 #[serde(default)]
111 pub ciks: Vec<String>,
112
113 #[serde(default)]
115 pub file_date: Option<String>,
116
117 #[serde(default)]
119 pub form: Option<String>,
120
121 #[serde(default)]
123 pub adsh: Option<String>,
124
125 #[serde(default)]
127 pub display_names: Vec<String>,
128
129 #[serde(default)]
131 pub period_ending: Option<String>,
132
133 #[serde(default)]
135 pub root_forms: Vec<String>,
136
137 #[serde(default)]
139 pub sics: Vec<String>,
140}
141
142#[cfg(test)]
143mod tests {
144 use super::*;
145
146 #[test]
147 #[cfg(feature = "dataframe")]
148 fn test_search_results_dataframe_conversion() {
149 let results = EdgarSearchResults {
150 query: Some(serde_json::json!({"query": {"match": {"doc_text": "test"}}})),
151 hits: Some(EdgarSearchHitsContainer {
152 total: Some(EdgarSearchTotal {
153 value: Some(1),
154 relation: Some("eq".to_string()),
155 }),
156 max_score: Some(1.5),
157 hits: vec![EdgarSearchHit {
158 _index: Some("edgar-filings".to_string()),
159 _id: Some("1".to_string()),
160 _score: Some(1.5),
161 _source: Some(EdgarSearchSource {
162 ciks: vec!["320193".to_string()],
163 file_date: Some("2024-11-01".to_string()),
164 form: Some("10-K".to_string()),
165 adsh: Some("0000320193-24-000123".to_string()),
166 display_names: vec!["Apple Inc. (AAPL)".to_string()],
167 period_ending: Some("2024-09-28".to_string()),
168 root_forms: vec!["10-K".to_string()],
169 sics: vec!["3571".to_string()],
170 }),
171 }],
172 }),
173 };
174
175 let df = results.to_dataframe().unwrap();
176 assert_eq!(df.height(), 1);
177 let col_names = df.get_column_names_owned();
178 assert!(col_names.iter().any(|n| n.as_str() == "form"));
179 assert!(col_names.iter().any(|n| n.as_str() == "file_date"));
180 }
181
182 #[test]
183 fn test_deserialize_search_results() {
184 let json = r#"{
185 "query": {"query": {"match": {"doc_text": "test"}}},
186 "hits": {
187 "total": {
188 "value": 10000,
189 "relation": "gte"
190 },
191 "max_score": 1.5,
192 "hits": [
193 {
194 "_index": "edgar-filings",
195 "_id": "1",
196 "_score": 1.5,
197 "_source": {
198 "ciks": ["320193"],
199 "file_date": "2024-11-01",
200 "form": "10-K",
201 "adsh": "0000320193-24-000123",
202 "display_names": ["Apple Inc. (AAPL)"],
203 "period_ending": "2024-09-28",
204 "root_forms": ["10-K"],
205 "sics": ["3571"]
206 }
207 }
208 ]
209 }
210 }"#;
211
212 let results: EdgarSearchResults = serde_json::from_str(json).unwrap();
213 assert!(results.query.is_some());
214 let hits_container = results.hits.as_ref().unwrap();
215 assert_eq!(hits_container.total.as_ref().unwrap().value, Some(10000));
216 assert_eq!(hits_container.hits.len(), 1);
217
218 let first_hit = &hits_container.hits[0];
219 let source = first_hit._source.as_ref().unwrap();
220 assert_eq!(source.ciks, vec!["320193"]);
221 assert_eq!(source.form.as_deref(), Some("10-K"));
222 assert!(!source.display_names.is_empty());
223 }
224}