use crate::cik::CikEntry;
use crate::error::{Error, Result};
use crate::nport::{parse_nport, NportFiling};
use crate::types::IndexId;
use std::time::Duration;
use tokio::time::sleep;
pub const SEC_USER_AGENT_DEFAULT: &str = "indexkit frederic.miesegaes@gmail.com";
pub fn resolved_sec_user_agent() -> String {
std::env::var("INDEXKIT_SEC_USER_AGENT").unwrap_or_else(|_| SEC_USER_AGENT_DEFAULT.to_string())
}
pub const INTER_REQUEST_DELAY: Duration = Duration::from_millis(120);
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct FilingRef {
pub accession: String,
pub filing_date: String,
pub report_date: Option<String>,
pub primary_document: String,
}
#[derive(Clone)]
pub struct SecClient {
http: reqwest::Client,
}
impl SecClient {
pub fn new() -> Result<Self> {
let ua = resolved_sec_user_agent();
let http = reqwest::Client::builder()
.user_agent(ua)
.timeout(Duration::from_secs(60))
.build()?;
Ok(Self { http })
}
pub async fn list_nport_filings(&self, cik: &str) -> Result<Vec<FilingRef>> {
let mut out: Vec<FilingRef> = Vec::new();
let url = format!("https://data.sec.gov/submissions/CIK{cik}.json");
let v: serde_json::Value = self.http_get_json(&url).await?;
append_from_submissions(&v, &mut out, true);
let older_files: Vec<String> = parse_older_archives(&v).into_iter().collect();
for fname in older_files {
sleep(INTER_REQUEST_DELAY).await;
let url = format!("https://data.sec.gov/submissions/{fname}");
match self.http_get_json(&url).await {
Ok(vv) => append_from_submissions(&vv, &mut out, false),
Err(e) => tracing::warn!("archive {fname} fetch failed: {e}"),
}
}
out.sort_by(|a, b| b.filing_date.cmp(&a.filing_date));
Ok(out)
}
pub async fn fetch_nport(&self, cik: &str, accession: &str) -> Result<NportFiling> {
let cik_num = cik
.trim_start_matches('0')
.parse::<u64>()
.map_err(|_| Error::Nport(format!("bad CIK {cik}")))?;
let no_dash: String = accession.chars().filter(|c| *c != '-').collect();
let url =
format!("https://www.sec.gov/Archives/edgar/data/{cik_num}/{no_dash}/primary_doc.xml");
let resp = self.http.get(&url).send().await?;
if !resp.status().is_success() {
return Err(Error::Nport(format!(
"fetch {url}: HTTP {} {}",
resp.status().as_u16(),
resp.status().canonical_reason().unwrap_or("")
)));
}
let body = resp.bytes().await?;
parse_nport(&body)
}
pub async fn filings_for_series(
&self,
entry: &CikEntry,
) -> Result<Vec<(FilingRef, NportFiling)>> {
let candidates: Vec<FilingRef> = match &entry.series_id {
Some(sid) => self.search_filings_by_series(&entry.trust_cik, sid).await?,
None => self.list_nport_filings(&entry.trust_cik).await?,
};
tracing::info!(
cik = %entry.trust_cik,
series = ?entry.series_id,
candidates = candidates.len(),
"series candidate shortlist"
);
let mut out = Vec::new();
for f in &candidates {
sleep(INTER_REQUEST_DELAY).await;
match self.fetch_nport(&entry.trust_cik, &f.accession).await {
Ok(nport) => {
let matches = match &entry.series_id {
Some(sid) => nport.header.series_id.as_deref() == Some(sid.as_str()),
None => true,
};
if matches {
out.push((f.clone(), nport));
}
}
Err(e) => tracing::warn!(
accession = %f.accession,
"skip: fetch/parse failed: {e}"
),
}
}
Ok(out)
}
pub async fn filter_to_series(
&self,
entry: &CikEntry,
_filings: &[FilingRef],
) -> Result<Vec<(FilingRef, NportFiling)>> {
self.filings_for_series(entry).await
}
pub async fn search_filings_by_series(
&self,
cik: &str,
series_id: &str,
) -> Result<Vec<FilingRef>> {
let mut out: Vec<FilingRef> = Vec::new();
let mut seen = std::collections::BTreeSet::<String>::new();
let mut from: u32 = 0;
loop {
sleep(INTER_REQUEST_DELAY).await;
let url = format!(
"https://efts.sec.gov/LATEST/search-index?q=%22{}%22&forms=NPORT-P&ciks={}&from={}",
series_id, cik, from
);
let v: serde_json::Value = self.http_get_json(&url).await?;
let hits = v.pointer("/hits/hits").and_then(|x| x.as_array());
let Some(hits) = hits else { break };
if hits.is_empty() {
break;
}
let mut added = 0;
for h in hits {
let Some(src) = h.get("_source") else {
continue;
};
let Some(accession) = src.get("adsh").and_then(|x| x.as_str()) else {
continue;
};
if !seen.insert(accession.to_string()) {
continue;
}
let filing_date = src
.get("file_date")
.and_then(|x| x.as_str())
.unwrap_or("")
.to_string();
let report_date = src
.get("period_ending")
.and_then(|x| x.as_str())
.map(str::to_string);
out.push(FilingRef {
accession: accession.to_string(),
filing_date,
report_date,
primary_document: "primary_doc.xml".into(),
});
added += 1;
}
let total = v
.pointer("/hits/total/value")
.and_then(|x| x.as_u64())
.unwrap_or(0);
from += hits.len() as u32;
if added == 0 || from >= total as u32 {
break;
}
}
out.sort_by(|a, b| b.filing_date.cmp(&a.filing_date));
Ok(out)
}
pub async fn filings_for_index(&self, index: IndexId) -> Result<Vec<(FilingRef, NportFiling)>> {
let entry = crate::cik::entry_for(index);
self.filings_for_series(&entry).await
}
async fn http_get_json(&self, url: &str) -> Result<serde_json::Value> {
let resp = self.http.get(url).send().await?;
if !resp.status().is_success() {
return Err(Error::Nport(format!(
"SEC fetch {url}: HTTP {} {}",
resp.status().as_u16(),
resp.status().canonical_reason().unwrap_or("")
)));
}
let body = resp.bytes().await?;
Ok(serde_json::from_slice(&body)?)
}
}
fn append_from_submissions(v: &serde_json::Value, out: &mut Vec<FilingRef>, use_recent_key: bool) {
let base = if use_recent_key {
v.pointer("/filings/recent")
} else {
Some(v)
};
let Some(r) = base else {
return;
};
let forms = r
.get("form")
.and_then(|x| x.as_array())
.cloned()
.unwrap_or_default();
let dates = r
.get("filingDate")
.and_then(|x| x.as_array())
.cloned()
.unwrap_or_default();
let accs = r
.get("accessionNumber")
.and_then(|x| x.as_array())
.cloned()
.unwrap_or_default();
let reports = r
.get("reportDate")
.and_then(|x| x.as_array())
.cloned()
.unwrap_or_default();
let docs = r
.get("primaryDocument")
.and_then(|x| x.as_array())
.cloned()
.unwrap_or_default();
for i in 0..forms.len() {
let Some(form) = forms.get(i).and_then(|x| x.as_str()) else {
continue;
};
if form != "NPORT-P" {
continue;
}
let accession = accs
.get(i)
.and_then(|x| x.as_str())
.unwrap_or("")
.to_string();
let filing_date = dates
.get(i)
.and_then(|x| x.as_str())
.unwrap_or("")
.to_string();
let report_date = reports.get(i).and_then(|x| x.as_str()).map(str::to_string);
let primary_document = docs
.get(i)
.and_then(|x| x.as_str())
.unwrap_or("primary_doc.xml")
.to_string();
if accession.is_empty() {
continue;
}
out.push(FilingRef {
accession,
filing_date,
report_date,
primary_document,
});
}
}
fn parse_older_archives(v: &serde_json::Value) -> Vec<String> {
v.pointer("/filings/files")
.and_then(|x| x.as_array())
.map(|arr| {
arr.iter()
.filter_map(|x| x.get("name").and_then(|n| n.as_str()).map(str::to_string))
.collect()
})
.unwrap_or_default()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn append_from_recent() {
let v: serde_json::Value = serde_json::from_str(
r#"{
"filings": {
"recent": {
"form": ["NPORT-P", "10-K", "NPORT-P"],
"filingDate": ["2024-06-01", "2024-05-01", "2024-07-01"],
"accessionNumber": ["0001-24-001", "0002-24-002", "0001-24-003"],
"reportDate": ["2024-04-30", "2023-12-31", "2024-05-31"],
"primaryDocument": ["primary_doc.xml", "10k.htm", "primary_doc.xml"]
}
}
}"#,
)
.unwrap();
let mut out = Vec::new();
append_from_submissions(&v, &mut out, true);
assert_eq!(out.len(), 2);
assert_eq!(out[0].filing_date, "2024-06-01");
assert_eq!(out[1].filing_date, "2024-07-01");
}
#[test]
fn parse_older_archives_extracts_names() {
let v: serde_json::Value = serde_json::from_str(
r#"{
"filings": {
"files": [
{"name":"CIK0001100663-submissions-001.json","filingCount":999,"filingFrom":"2021-01-01","filingTo":"2021-12-31"},
{"name":"CIK0001100663-submissions-002.json","filingCount":999,"filingFrom":"2020-01-01","filingTo":"2020-12-31"}
]
}
}"#,
)
.unwrap();
let names = parse_older_archives(&v);
assert_eq!(names.len(), 2);
assert_eq!(names[0], "CIK0001100663-submissions-001.json");
}
}