use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
#[non_exhaustive]
pub struct EdgarSearchResults {
#[serde(default)]
pub query: Option<serde_json::Value>,
#[serde(default)]
pub hits: Option<EdgarSearchHitsContainer>,
}
#[cfg(feature = "dataframe")]
impl EdgarSearchResults {
pub fn to_dataframe(&self) -> ::polars::prelude::PolarsResult<::polars::prelude::DataFrame> {
let sources: Vec<EdgarSearchSource> = self
.hits
.as_ref()
.map(|h| &h.hits)
.map(|hits| hits.iter().filter_map(|hit| hit._source.clone()).collect())
.unwrap_or_default();
EdgarSearchSource::vec_to_dataframe(&sources)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[non_exhaustive]
pub struct EdgarSearchHitsContainer {
#[serde(default)]
pub total: Option<EdgarSearchTotal>,
#[serde(default)]
pub max_score: Option<f64>,
#[serde(default)]
pub hits: Vec<EdgarSearchHit>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[non_exhaustive]
pub struct EdgarSearchTotal {
#[serde(default)]
pub value: Option<u64>,
#[serde(default)]
pub relation: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[non_exhaustive]
pub struct EdgarSearchHit {
#[serde(default)]
pub _index: Option<String>,
#[serde(default)]
pub _id: Option<String>,
#[serde(default)]
pub _score: Option<f64>,
#[serde(default)]
pub _source: Option<EdgarSearchSource>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "dataframe", derive(crate::ToDataFrame))]
#[non_exhaustive]
pub struct EdgarSearchSource {
#[serde(default)]
pub ciks: Vec<String>,
#[serde(default)]
pub file_date: Option<String>,
#[serde(default)]
pub form: Option<String>,
#[serde(default)]
pub adsh: Option<String>,
#[serde(default)]
pub display_names: Vec<String>,
#[serde(default)]
pub period_ending: Option<String>,
#[serde(default)]
pub root_forms: Vec<String>,
#[serde(default)]
pub sics: Vec<String>,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
#[cfg(feature = "dataframe")]
fn test_search_results_dataframe_conversion() {
let results = EdgarSearchResults {
query: Some(serde_json::json!({"query": {"match": {"doc_text": "test"}}})),
hits: Some(EdgarSearchHitsContainer {
total: Some(EdgarSearchTotal {
value: Some(1),
relation: Some("eq".to_string()),
}),
max_score: Some(1.5),
hits: vec![EdgarSearchHit {
_index: Some("edgar-filings".to_string()),
_id: Some("1".to_string()),
_score: Some(1.5),
_source: Some(EdgarSearchSource {
ciks: vec!["320193".to_string()],
file_date: Some("2024-11-01".to_string()),
form: Some("10-K".to_string()),
adsh: Some("0000320193-24-000123".to_string()),
display_names: vec!["Apple Inc. (AAPL)".to_string()],
period_ending: Some("2024-09-28".to_string()),
root_forms: vec!["10-K".to_string()],
sics: vec!["3571".to_string()],
}),
}],
}),
};
let df = results.to_dataframe().unwrap();
assert_eq!(df.height(), 1);
let col_names = df.get_column_names_owned();
assert!(col_names.iter().any(|n| n.as_str() == "form"));
assert!(col_names.iter().any(|n| n.as_str() == "file_date"));
}
#[test]
fn test_deserialize_search_results() {
let json = r#"{
"query": {"query": {"match": {"doc_text": "test"}}},
"hits": {
"total": {
"value": 10000,
"relation": "gte"
},
"max_score": 1.5,
"hits": [
{
"_index": "edgar-filings",
"_id": "1",
"_score": 1.5,
"_source": {
"ciks": ["320193"],
"file_date": "2024-11-01",
"form": "10-K",
"adsh": "0000320193-24-000123",
"display_names": ["Apple Inc. (AAPL)"],
"period_ending": "2024-09-28",
"root_forms": ["10-K"],
"sics": ["3571"]
}
}
]
}
}"#;
let results: EdgarSearchResults = serde_json::from_str(json).unwrap();
assert!(results.query.is_some());
let hits_container = results.hits.as_ref().unwrap();
assert_eq!(hits_container.total.as_ref().unwrap().value, Some(10000));
assert_eq!(hits_container.hits.len(), 1);
let first_hit = &hits_container.hits[0];
let source = first_hit._source.as_ref().unwrap();
assert_eq!(source.ciks, vec!["320193"]);
assert_eq!(source.form.as_deref(), Some("10-K"));
assert!(!source.display_names.is_empty());
}
}