use serde::Deserialize;
#[derive(Debug, Clone, PartialEq)]
pub struct XbrlFact {
pub tag: String,
pub unit: String,
pub value: f64,
pub period_start: String,
pub period_end: String,
pub fiscal_year: i64,
pub fiscal_period: String,
pub form: String,
pub accession: String,
pub frame: String,
}
type FactsMap = std::collections::HashMap<String, std::collections::HashMap<String, Concept>>;
#[derive(Deserialize)]
struct CompanyFacts {
#[serde(default)]
facts: FactsMap,
}
#[derive(Deserialize)]
struct Concept {
#[serde(default)]
units: std::collections::HashMap<String, Vec<RawFact>>,
}
#[derive(Deserialize)]
struct RawFact {
#[serde(default)]
start: String,
#[serde(default)]
end: String,
#[serde(default)]
val: f64,
#[serde(default)]
fy: Option<i64>,
#[serde(default)]
fp: Option<String>,
#[serde(default)]
form: String,
#[serde(default)]
accn: String,
#[serde(default)]
frame: String,
}
pub fn parse_company_facts(json: &str, tag_whitelist: &[&str]) -> Result<Vec<XbrlFact>, String> {
let parsed: CompanyFacts =
serde_json::from_str(json).map_err(|e| format!("companyfacts JSON: {e}"))?;
let mut out = Vec::new();
for (taxonomy, concepts) in &parsed.facts {
for (concept, c) in concepts {
if !tag_whitelist.is_empty() && !tag_whitelist.contains(&concept.as_str()) {
continue;
}
let tag = format!("{taxonomy}:{concept}");
for (unit, facts) in &c.units {
for f in facts {
out.push(XbrlFact {
tag: tag.clone(),
unit: unit.clone(),
value: f.val,
period_start: f.start.clone(),
period_end: f.end.clone(),
fiscal_year: f.fy.unwrap_or(0),
fiscal_period: f.fp.clone().unwrap_or_default(),
form: f.form.clone(),
accession: f.accn.clone(),
frame: f.frame.clone(),
});
}
}
}
}
Ok(out)
}
pub const DEFAULT_FINANCIAL_TAGS: &[&str] = &[
"Revenues",
"RevenueFromContractWithCustomerExcludingAssessedTax",
"CostOfRevenue",
"GrossProfit",
"OperatingIncomeLoss",
"NetIncomeLoss",
"EarningsPerShareBasic",
"EarningsPerShareDiluted",
"ResearchAndDevelopmentExpense",
"SellingGeneralAndAdministrativeExpense",
"Assets",
"AssetsCurrent",
"Liabilities",
"LiabilitiesCurrent",
"StockholdersEquity",
"CashAndCashEquivalentsAtCarryingValue",
"LongTermDebtNoncurrent",
"RetainedEarningsAccumulatedDeficit",
"NetCashProvidedByUsedInOperatingActivities",
"NetCashProvidedByUsedInInvestingActivities",
"NetCashProvidedByUsedInFinancingActivities",
"CommonStockSharesOutstanding",
"WeightedAverageNumberOfSharesOutstandingBasic",
"WeightedAverageNumberOfDilutedSharesOutstanding",
];
#[cfg(test)]
mod tests {
use super::*;
const SAMPLE: &str = r#"{
"cik": 320193,
"entityName": "Apple Inc.",
"facts": {
"us-gaap": {
"Revenues": {
"label": "Revenues",
"units": {
"USD": [
{"start": "2022-10-01", "end": "2023-09-30", "val": 383285000000,
"fy": 2023, "fp": "FY", "form": "10-K",
"accn": "0000320193-23-000106", "frame": "CY2023"},
{"start": "2023-07-01", "end": "2023-09-30", "val": 89498000000,
"fy": 2023, "fp": "Q4", "form": "10-K",
"accn": "0000320193-23-000106"}
]
}
},
"Assets": {
"label": "Assets",
"units": {
"USD": [
{"end": "2023-09-30", "val": 352583000000,
"fy": 2023, "fp": "FY", "form": "10-K",
"accn": "0000320193-23-000106", "frame": "CY2023Q3I"}
]
}
},
"SomeNicheConcept": {
"label": "Niche",
"units": {"USD": [{"end": "2023-09-30", "val": 1, "fy": 2023, "fp": "FY"}]}
}
},
"dei": {
"EntityCommonStockSharesOutstanding": {
"units": {"shares": [{"end": "2023-10-20", "val": 15552752000, "fy": 2023, "fp": "FY"}]}
}
}
}
}"#;
#[test]
fn parses_all_facts_with_empty_whitelist() {
let facts = parse_company_facts(SAMPLE, &[]).unwrap();
assert_eq!(facts.len(), 5);
}
#[test]
fn whitelist_filters_to_headline_concepts() {
let facts = parse_company_facts(SAMPLE, &["Revenues", "Assets"]).unwrap();
assert_eq!(facts.len(), 3);
assert!(facts
.iter()
.all(|f| f.tag.ends_with("Revenues") || f.tag.ends_with("Assets")));
}
#[test]
fn captures_period_and_provenance_fields() {
let facts = parse_company_facts(SAMPLE, &["Revenues"]).unwrap();
let annual = facts
.iter()
.find(|f| f.fiscal_period == "FY")
.expect("FY revenue fact");
assert_eq!(annual.value, 383285000000.0);
assert_eq!(annual.period_start, "2022-10-01");
assert_eq!(annual.period_end, "2023-09-30");
assert_eq!(annual.form, "10-K");
assert_eq!(annual.accession, "0000320193-23-000106");
assert_eq!(annual.frame, "CY2023");
assert_eq!(annual.tag, "us-gaap:Revenues");
}
#[test]
fn instant_facts_have_empty_start() {
let facts = parse_company_facts(SAMPLE, &["Assets"]).unwrap();
assert_eq!(facts.len(), 1);
assert_eq!(facts[0].period_start, "");
assert_eq!(facts[0].period_end, "2023-09-30");
}
#[test]
fn malformed_json_errors() {
assert!(parse_company_facts("{not json", &[]).is_err());
}
}