use crate::date::YearMonth;
use chrono::NaiveDate;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum DataSource {
IsharesCdn,
InvescoCdn,
SpdrCdn,
Wayback(String),
SecNport,
GithubFja05680,
GithubYfiua {
month: YearMonth,
},
GithubHanshof,
}
impl DataSource {
pub fn tag(&self) -> String {
match self {
DataSource::IsharesCdn => "ishares_cdn".into(),
DataSource::InvescoCdn => "invesco_cdn".into(),
DataSource::SpdrCdn => "spdr_cdn".into(),
DataSource::Wayback(yyyymmdd) => format!("wayback_{yyyymmdd}"),
DataSource::SecNport => "sec_nport".into(),
DataSource::GithubFja05680 => "github_fja05680".into(),
DataSource::GithubYfiua { month } => format!("github_yfiua_{month}"),
DataSource::GithubHanshof => "github_hanshof".into(),
}
}
pub fn from_tag(s: &str) -> Option<Self> {
match s {
"ishares_cdn" => Some(DataSource::IsharesCdn),
"invesco_cdn" => Some(DataSource::InvescoCdn),
"spdr_cdn" => Some(DataSource::SpdrCdn),
"sec_nport" => Some(DataSource::SecNport),
"github_fja05680" => Some(DataSource::GithubFja05680),
"github_hanshof" => Some(DataSource::GithubHanshof),
tag if tag.starts_with("wayback_") => Some(DataSource::Wayback(tag[8..].to_string())),
tag if tag.starts_with("github_yfiua_") => {
let rest = &tag[13..];
rest.parse::<YearMonth>()
.ok()
.map(|month| DataSource::GithubYfiua { month })
}
_ => None,
}
}
pub fn priority(&self) -> u8 {
match self {
DataSource::IsharesCdn | DataSource::InvescoCdn | DataSource::SpdrCdn => 5,
DataSource::GithubFja05680 => 4,
DataSource::GithubYfiua { .. } => 3,
DataSource::GithubHanshof => 3,
DataSource::Wayback(_) => 2,
DataSource::SecNport => 1,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum Resolution {
Daily,
Sparse,
Monthly,
None,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Constituent {
pub ticker: Option<String>,
pub name: String,
pub cusip: String,
pub lei: Option<String>,
pub shares: f64,
pub market_value_usd: f64,
pub weight: f64,
pub issuer_cik: Option<String>,
pub sector: Option<Sector>,
pub as_of: NaiveDate,
pub source: DataSource,
}
impl Constituent {
pub fn weight_opt(&self) -> Option<f64> {
if self.weight.is_finite() {
Some(self.weight)
} else {
None
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum Sector {
CommunicationServices,
ConsumerDiscretionary,
ConsumerStaples,
Energy,
Financials,
HealthCare,
Industrials,
InformationTechnology,
Materials,
RealEstate,
Utilities,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct IndexSnapshot {
pub index: IndexId,
pub year_month: YearMonth,
pub constituents: Vec<Constituent>,
}
impl IndexSnapshot {
pub fn has_weights(&self) -> bool {
self.constituents.iter().any(|c| c.weight.is_finite())
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct DailySnapshot {
pub index: IndexId,
pub date: NaiveDate,
pub constituents: Vec<Constituent>,
pub source: DataSource,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum IndexId {
Sp500,
Sp400,
Sp600,
Ndx,
Dji,
}
impl IndexId {
pub const ALL: [IndexId; 5] = [
IndexId::Sp500,
IndexId::Sp400,
IndexId::Sp600,
IndexId::Ndx,
IndexId::Dji,
];
pub fn from_str_id(s: &str) -> Option<Self> {
match s.to_ascii_lowercase().as_str() {
"sp500" => Some(IndexId::Sp500),
"sp400" => Some(IndexId::Sp400),
"sp600" => Some(IndexId::Sp600),
"ndx" | "nasdaq100" | "nasdaq-100" => Some(IndexId::Ndx),
"dji" | "djia" | "dow" => Some(IndexId::Dji),
_ => None,
}
}
pub fn as_str(self) -> &'static str {
match self {
IndexId::Sp500 => "sp500",
IndexId::Sp400 => "sp400",
IndexId::Sp600 => "sp600",
IndexId::Ndx => "ndx",
IndexId::Dji => "dji",
}
}
}
impl std::fmt::Display for IndexId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.as_str())
}
}
impl std::str::FromStr for IndexId {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
IndexId::from_str_id(s).ok_or_else(|| format!("unknown index id: {s:?}"))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn indexid_roundtrip() {
for &id in &IndexId::ALL {
let s = id.as_str();
assert_eq!(IndexId::from_str_id(s), Some(id));
}
}
#[test]
fn indexid_aliases() {
assert_eq!(IndexId::from_str_id("nasdaq100"), Some(IndexId::Ndx));
assert_eq!(IndexId::from_str_id("djia"), Some(IndexId::Dji));
assert_eq!(IndexId::from_str_id("SP500"), Some(IndexId::Sp500));
}
#[test]
fn indexid_unknown() {
assert_eq!(IndexId::from_str_id("totally-fake"), None);
}
#[test]
fn data_source_tag_roundtrip_core() {
for ds in [
DataSource::IsharesCdn,
DataSource::InvescoCdn,
DataSource::SpdrCdn,
DataSource::SecNport,
DataSource::GithubFja05680,
DataSource::GithubHanshof,
DataSource::Wayback("20240315".into()),
DataSource::GithubYfiua {
month: YearMonth::new(2024, 3).unwrap(),
},
] {
let tag = ds.tag();
let back = DataSource::from_tag(&tag).expect("parseable");
assert_eq!(back, ds, "tag {tag} did not round-trip");
}
}
#[test]
fn data_source_priority_ladder() {
assert_eq!(DataSource::IsharesCdn.priority(), 5);
assert_eq!(DataSource::InvescoCdn.priority(), 5);
assert_eq!(DataSource::SpdrCdn.priority(), 5);
assert_eq!(DataSource::GithubFja05680.priority(), 4);
assert_eq!(
DataSource::GithubYfiua {
month: YearMonth::new(2024, 3).unwrap()
}
.priority(),
3
);
assert_eq!(DataSource::GithubHanshof.priority(), 3);
assert_eq!(DataSource::Wayback("20240315".into()).priority(), 2);
assert_eq!(DataSource::SecNport.priority(), 1);
}
fn ticker_only_row(ticker: &str, date: NaiveDate, src: DataSource) -> Constituent {
Constituent {
ticker: Some(ticker.into()),
name: String::new(),
cusip: String::new(),
lei: None,
shares: 0.0,
market_value_usd: 0.0,
weight: f64::NAN,
issuer_cik: None,
sector: None,
as_of: date,
source: src,
}
}
#[test]
fn weight_opt_nan_is_none() {
let d = NaiveDate::from_ymd_opt(2024, 1, 15).unwrap();
let row = ticker_only_row("AAPL", d, DataSource::GithubFja05680);
assert_eq!(row.weight_opt(), None);
}
#[test]
fn weight_opt_finite_is_some() {
let d = NaiveDate::from_ymd_opt(2024, 1, 15).unwrap();
let mut row = ticker_only_row("AAPL", d, DataSource::IsharesCdn);
row.weight = 0.072;
assert_eq!(row.weight_opt(), Some(0.072));
}
#[test]
fn weight_opt_infinity_is_none() {
let d = NaiveDate::from_ymd_opt(2024, 1, 15).unwrap();
let mut row = ticker_only_row("AAPL", d, DataSource::IsharesCdn);
row.weight = f64::INFINITY;
assert_eq!(row.weight_opt(), None);
}
#[test]
fn snapshot_has_weights_true_when_any_finite() {
let d = NaiveDate::from_ymd_opt(2024, 1, 15).unwrap();
let mut with_weight = ticker_only_row("AAPL", d, DataSource::IsharesCdn);
with_weight.weight = 0.05;
let nan_row = ticker_only_row("MSFT", d, DataSource::GithubFja05680);
let s = IndexSnapshot {
index: IndexId::Sp500,
year_month: YearMonth::new(2024, 1).unwrap(),
constituents: vec![with_weight, nan_row],
};
assert!(s.has_weights());
}
#[test]
fn snapshot_has_weights_false_when_all_nan() {
let d = NaiveDate::from_ymd_opt(2024, 1, 15).unwrap();
let row1 = ticker_only_row("AAPL", d, DataSource::GithubFja05680);
let row2 = ticker_only_row("MSFT", d, DataSource::GithubHanshof);
let s = IndexSnapshot {
index: IndexId::Sp500,
year_month: YearMonth::new(2024, 1).unwrap(),
constituents: vec![row1, row2],
};
assert!(!s.has_weights());
}
#[test]
fn snapshot_has_weights_false_when_empty() {
let s = IndexSnapshot {
index: IndexId::Sp500,
year_month: YearMonth::new(2024, 1).unwrap(),
constituents: Vec::new(),
};
assert!(!s.has_weights());
}
}