use super::Edgar;
use super::error::{EdgarError, Result};
use super::options::FilingOptions;
use super::traits::IndexOperations;
use crate::parsing::index::{IndexConfig, IndexEntry, IndexParser, IndexType};
use async_trait::async_trait;
use chrono::{Datelike, NaiveDateTime};
use flate2::read::GzDecoder;
use serde::{Deserialize, Serialize};
use serde_json;
use std::io::Read;
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub struct IndexResponse {
pub directory: Directory,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Directory {
pub item: Vec<DirectoryItem>,
pub name: String,
#[serde(rename = "parent-dir")]
pub parent_dir: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum ItemType {
Dir,
File,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DirectoryItem {
#[serde(rename = "last-modified")]
#[serde(with = "edgar_date_format")]
pub last_modified: NaiveDateTime,
pub name: String,
#[serde(rename = "type")]
pub type_: ItemType,
pub href: String,
pub size: String,
}
mod edgar_date_format {
use chrono::NaiveDateTime;
use serde::{self, Deserialize, Deserializer, Serializer};
const FORMAT: &str = "%m/%d/%Y %I:%M:%S %p";
pub fn deserialize<'de, D>(deserializer: D) -> Result<NaiveDateTime, D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
NaiveDateTime::parse_from_str(&s, FORMAT).map_err(serde::de::Error::custom)
}
pub fn serialize<S>(date: &NaiveDateTime, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(&date.format(FORMAT).to_string())
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Quarter {
Q1 = 1,
Q2 = 2,
Q3 = 3,
Q4 = 4,
}
impl Quarter {
pub fn from_month(month: u32) -> Result<Self> {
match month {
1..=3 => Ok(Quarter::Q1),
4..=6 => Ok(Quarter::Q2),
7..=9 => Ok(Quarter::Q3),
10..=12 => Ok(Quarter::Q4),
_ => Err(EdgarError::InvalidMonth),
}
}
pub fn as_i32(&self) -> i32 {
*self as i32
}
}
#[derive(Debug, Clone, Copy)]
pub struct EdgarDay {
year: i32,
month: u32,
day: u32,
}
impl EdgarDay {
pub fn new(year: i32, month: u32, day: u32) -> Result<Self> {
if year < 1994 {
return Err(EdgarError::InvalidYear);
}
if month < 1 || month > 12 {
return Err(EdgarError::InvalidMonth);
}
if day < 1 || day > 31 {
return Err(EdgarError::InvalidDay);
}
Ok(Self { year, month, day })
}
pub fn format_date(&self) -> String {
format!("{:04}{:02}{:02}", self.year, self.month, self.day)
}
pub fn quarter(&self) -> Quarter {
Quarter::from_month(self.month).unwrap()
}
pub fn year(&self) -> i32 {
self.year
}
}
#[derive(Debug, Clone, Copy)]
pub struct EdgarPeriod {
year: i32,
quarter: Quarter,
}
#[derive(Debug, Clone, Copy)]
pub enum EdgarDate {
Day(EdgarDay),
Period(),
}
impl From<EdgarDay> for EdgarDate {
fn from(day: EdgarDay) -> Self {
EdgarDate::Day(day)
}
}
impl From<EdgarPeriod> for EdgarDate {
fn from(_period: EdgarPeriod) -> Self {
EdgarDate::Period()
}
}
impl EdgarPeriod {
pub fn new(year: i32, quarter: Quarter) -> Result<Self> {
if year < 1994 {
return Err(EdgarError::InvalidYear);
}
Ok(Self { year, quarter })
}
pub fn year(&self) -> i32 {
self.year
}
pub fn quarter(&self) -> Quarter {
self.quarter
}
}
impl Edgar {
fn is_archive(filename: &str) -> bool {
filename.ends_with(".gz")
}
async fn extract_archive(&self, content: Vec<u8>, filename: &str) -> Result<String> {
if filename.ends_with(".gz") {
let mut decoder = GzDecoder::new(&content[..]);
let mut result = String::new();
decoder.read_to_string(&mut result)?;
Ok(result)
} else {
Ok(String::from_utf8(content)?)
}
}
async fn download_file(&self, url: &str, is_archive: bool) -> Result<String> {
if is_archive {
let bytes = self.get_bytes(url).await?;
self.extract_archive(bytes, url).await
} else {
self.get(url).await
}
}
async fn download_and_parse_index(
&self,
url: &str,
file_name: &str,
index_type: Option<IndexType>,
) -> Result<Vec<IndexEntry>> {
let is_archive = Self::is_archive(file_name);
let content = self.download_file(url, is_archive).await?;
let config = IndexConfig {
index_type,
..Default::default()
};
let parser = IndexParser::new(config);
Ok(parser.parse(content.as_bytes())?)
}
fn find_index_file<'a>(
items: &'a [DirectoryItem],
date: impl Into<EdgarDate>,
index_type: IndexType,
) -> Option<&'a DirectoryItem> {
let prefix = index_type.as_str();
let extensions = ["gz", "idx"];
match date.into() {
EdgarDate::Day(day) => {
let date_str = day.format_date();
for ext in extensions {
let pattern = format!("{}.{}.{}", prefix, date_str, ext);
if let Some(item) = items
.iter()
.find(|i| i.name == pattern && i.type_ == ItemType::File)
{
return Some(item);
}
}
}
EdgarDate::Period() => {
for ext in extensions {
let pattern = format!("{}.{}", prefix, ext);
if let Some(item) = items
.iter()
.find(|i| i.name == pattern && i.type_ == ItemType::File)
{
return Some(item);
}
}
}
}
None
}
fn build_index_url(
&self,
index_type: &str,
year: Option<i32>,
quarter: Option<i32>,
) -> Result<String> {
let url = match (year, quarter) {
(None, None) => {
format!(
"{}/{}-index/index.json",
self.edgar_archives_url, index_type
)
}
(Some(y), None) => {
format!(
"{}/{}-index/{}/index.json",
self.edgar_archives_url, index_type, y
)
}
(None, Some(q)) => {
let current_year = chrono::Local::now().year();
format!(
"{}/{}-index/{}/QTR{}/index.json",
self.edgar_archives_url, index_type, current_year, q
)
}
(Some(y), Some(q)) => {
format!(
"{}/{}-index/{}/QTR{}/index.json",
self.edgar_archives_url, index_type, y, q
)
}
};
Ok(url)
}
async fn fetch_index(
&self,
index_type: &str,
year: Option<i32>,
quarter: Option<i32>,
) -> Result<IndexResponse> {
match (year, quarter) {
(Some(y), _) if y < 1994 => Err(EdgarError::InvalidYear),
(_, Some(q)) if q < 1 || q > 4 => Err(EdgarError::InvalidQuarter),
_ => {
let url = self.build_index_url(index_type, year, quarter)?;
let response = self.get(&url).await?;
Ok(serde_json::from_str(&response)?)
}
}
}
fn apply_filters(&self, mut entries: Vec<IndexEntry>, opts: &FilingOptions) -> Vec<IndexEntry> {
if let Some(ref form_types) = opts.form_types {
entries.retain(|entry| form_types.iter().any(|ft| ft == &entry.form_type.trim()));
}
if let Some(ref ciks) = opts.ciks {
entries.retain(|entry| ciks.contains(&entry.cik));
}
if let Some(offset) = opts.offset {
entries = entries.into_iter().skip(offset).collect();
}
if let Some(limit) = opts.limit {
entries.truncate(limit);
}
entries
}
}
#[async_trait]
impl IndexOperations for Edgar {
async fn get_daily_filings(
&self,
day: EdgarDay,
options: Option<FilingOptions>,
) -> Result<Vec<IndexEntry>> {
let index = IndexType::default();
let response = self
.fetch_index("daily", Some(day.year()), Some(day.quarter().as_i32()))
.await?;
let index_file = Self::find_index_file(&response.directory.item, day, index)
.ok_or_else(|| EdgarError::NotFound)?;
let url = format!(
"{}/daily-index/{}/QTR{}/{}",
self.edgar_archives_url,
day.year(),
day.quarter().as_i32(),
index_file.href
);
let mut entries = self
.download_and_parse_index(&url, &index_file.name, Some(index))
.await?;
if let Some(opts) = options {
entries = self.apply_filters(entries, &opts);
}
Ok(entries)
}
async fn get_period_filings(
&self,
period: EdgarPeriod,
options: Option<FilingOptions>,
) -> Result<Vec<IndexEntry>> {
let index = IndexType::default();
let response = self
.fetch_index("full", Some(period.year()), Some(period.quarter().as_i32()))
.await?;
let index_file = Self::find_index_file(&response.directory.item, period, index)
.ok_or_else(|| EdgarError::NotFound)?;
let url = format!(
"{}/full-index/{}/QTR{}/{}",
self.edgar_archives_url,
period.year(),
period.quarter().as_i32(),
index_file.href
);
let mut entries = self
.download_and_parse_index(&url, &index_file.name, Some(index))
.await?;
if let Some(opts) = options {
entries = self.apply_filters(entries, &opts);
}
Ok(entries)
}
async fn daily_index(&self, period: Option<EdgarPeriod>) -> Result<IndexResponse> {
match period {
Some(p) => {
self.fetch_index("daily", Some(p.year()), Some(p.quarter().as_i32()))
.await
}
None => self.fetch_index("daily", None, None).await,
}
}
async fn full_index(&self, period: Option<EdgarPeriod>) -> Result<IndexResponse> {
match period {
Some(p) => {
self.fetch_index("full", Some(p.year()), Some(p.quarter().as_i32()))
.await
}
None => self.fetch_index("full", None, None).await,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_find_index_file() {
let items = vec![
DirectoryItem {
last_modified: NaiveDateTime::parse_from_str(
"08/15/2023 12:00:00 AM",
"%m/%d/%Y %I:%M:%S %p",
)
.unwrap(),
name: "company.20230815.idx".to_string(),
type_: ItemType::File,
href: "company.20230815.idx".to_string(),
size: "1000".to_string(),
},
DirectoryItem {
last_modified: NaiveDateTime::parse_from_str(
"08/15/2023 12:00:00 AM",
"%m/%d/%Y %I:%M:%S %p",
)
.unwrap(),
name: "company.idx".to_string(),
type_: ItemType::File,
href: "company.idx".to_string(),
size: "2000".to_string(),
},
];
let day = EdgarDay::new(2023, 8, 15).unwrap();
let file = Edgar::find_index_file(&items, day, IndexType::Company).unwrap();
assert_eq!(file.name, "company.20230815.idx");
assert_eq!(file.href, "company.20230815.idx");
assert_eq!(file.size, "1000");
assert_eq!(file.type_, ItemType::File);
let period = EdgarPeriod::new(2023, Quarter::Q3).unwrap();
let file = Edgar::find_index_file(&items, period, IndexType::Company).unwrap();
assert_eq!(file.name, "company.idx");
assert_eq!(file.href, "company.idx");
assert_eq!(file.size, "2000");
assert_eq!(file.type_, ItemType::File);
}
#[test]
fn test_daily_index_invalid_year() {
let period = EdgarPeriod::new(1993, Quarter::Q1);
assert!(matches!(period, Err(EdgarError::InvalidYear)));
}
#[test]
fn test_period_invalid_year() {
let period = EdgarPeriod::new(1993, Quarter::Q1);
assert!(matches!(period, Err(EdgarError::InvalidYear)));
}
}