#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PeriodSlot {
Q1,
Q2,
Q3,
Q4,
FY,
H1,
H2,
M3,
M6,
M9,
M12,
}
impl PeriodSlot {
pub fn normalized_quarter(self) -> i64 {
match self {
PeriodSlot::Q1 | PeriodSlot::M3 => 1,
PeriodSlot::Q2 | PeriodSlot::H1 | PeriodSlot::M6 => 2,
PeriodSlot::Q3 | PeriodSlot::M9 => 3,
PeriodSlot::Q4 | PeriodSlot::FY | PeriodSlot::H2 | PeriodSlot::M12 => 4,
}
}
}
pub fn parse_quarter_token(s: &str) -> Option<i64> {
let upper = s.to_ascii_uppercase();
let pos = upper.find('Q')?;
let n = upper[pos + 1..]
.chars()
.next()
.and_then(|c| c.to_digit(10))
.map(i64::from)?;
if (1..=4).contains(&n) { Some(n) } else { None }
}
pub fn parse_period_slot(s: &str) -> Option<PeriodSlot> {
let upper = s.trim().to_ascii_uppercase();
if let Some(q) = parse_quarter_token(&upper) {
return Some(match q {
1 => PeriodSlot::Q1,
2 => PeriodSlot::Q2,
3 => PeriodSlot::Q3,
4 => PeriodSlot::Q4,
_ => return None,
});
}
if upper.contains("FY") || upper.contains("ANNUAL") {
return Some(PeriodSlot::FY);
}
if upper.contains("HY1")
|| upper.contains("H1")
|| upper.contains("SA1")
|| upper.contains("S1")
{
return Some(PeriodSlot::H1);
}
if upper.contains("HY2")
|| upper.contains("H2")
|| upper.contains("SA2")
|| upper.contains("S2")
{
return Some(PeriodSlot::H2);
}
if upper.contains("12M") {
return Some(PeriodSlot::M12);
}
if upper.contains("9M") {
return Some(PeriodSlot::M9);
}
if upper.contains("6M") {
return Some(PeriodSlot::M6);
}
if upper.contains("3M") {
return Some(PeriodSlot::M3);
}
None
}
pub fn parse_period_slot_token(s: &str) -> Option<i64> {
parse_period_slot(s).map(PeriodSlot::normalized_quarter)
}
pub fn normalize_fp_label(fp: &str) -> String {
if fp.trim().eq_ignore_ascii_case("Q4") {
"FY".to_string()
} else {
fp.to_string()
}
}
pub fn normalize_symbol(symbol: &str) -> Vec<String> {
let upper = symbol.to_ascii_uppercase();
let dot_to_dash = upper.replace('.', "-");
let dash_to_dot = upper.replace('-', ".");
let mut set = std::collections::HashSet::new();
set.insert(upper);
set.insert(dot_to_dash);
set.insert(dash_to_dot);
let mut out: Vec<String> = set.into_iter().collect();
out.sort();
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn quarters_rank_correctly() {
assert_eq!(parse_period_slot_token("Q1"), Some(1));
assert_eq!(parse_period_slot_token("Q2"), Some(2));
assert_eq!(parse_period_slot_token("Q3"), Some(3));
assert_eq!(parse_period_slot_token("Q4"), Some(4));
}
#[test]
fn fy_ranks_same_as_q4() {
assert_eq!(parse_period_slot_token("FY"), Some(4));
}
#[test]
fn semi_annual_aliases() {
assert_eq!(parse_period_slot_token("H1"), Some(2));
assert_eq!(parse_period_slot_token("H2"), Some(4));
assert_eq!(parse_period_slot_token("HY1"), Some(2));
assert_eq!(parse_period_slot_token("HY2"), Some(4));
assert_eq!(parse_period_slot_token("SA1"), Some(2));
assert_eq!(parse_period_slot_token("SA2"), Some(4));
assert_eq!(parse_period_slot_token("S1"), Some(2));
assert_eq!(parse_period_slot_token("S2"), Some(4));
}
#[test]
fn month_window_aliases() {
assert_eq!(parse_period_slot_token("3M"), Some(1));
assert_eq!(parse_period_slot_token("6M"), Some(2));
assert_eq!(parse_period_slot_token("9M"), Some(3));
assert_eq!(parse_period_slot_token("12M"), Some(4));
}
#[test]
fn unrecognised_returns_none() {
assert_eq!(parse_period_slot_token(""), None);
assert_eq!(parse_period_slot_token("SA"), None);
assert_eq!(parse_period_slot_token("Q5"), None);
}
#[test]
fn case_insensitive() {
assert_eq!(parse_period_slot_token("fy"), Some(4));
assert_eq!(parse_period_slot_token("q2"), Some(2));
assert_eq!(parse_period_slot_token("sa2"), Some(4));
}
#[test]
fn normalize_fp_label_maps_q4_to_fy() {
assert_eq!(normalize_fp_label("Q4"), "FY");
assert_eq!(normalize_fp_label("q4"), "FY");
}
#[test]
fn normalize_fp_label_leaves_other_tokens_unchanged() {
assert_eq!(normalize_fp_label("FY"), "FY");
assert_eq!(normalize_fp_label("Q3"), "Q3");
assert_eq!(normalize_fp_label("H1"), "H1");
assert_eq!(normalize_fp_label("SA2"), "SA2");
assert_eq!(normalize_fp_label(""), "");
}
#[test]
fn normalize_symbol_generates_dot_and_dash_variants() {
let c = normalize_symbol("brk.b");
assert!(c.contains(&"BRK.B".to_string()));
assert!(c.contains(&"BRK-B".to_string()));
}
#[test]
fn normalize_symbol_upcases_plain_ticker() {
let c = normalize_symbol("aapl");
assert_eq!(c, vec!["AAPL".to_string()]);
}
#[test]
fn test_extract_first_year_from_combined() {
assert_eq!(extract_first_year("2024Q3"), Some(2024));
}
#[test]
fn test_extract_first_year_from_plain_year() {
assert_eq!(extract_first_year("2024"), Some(2024));
}
#[test]
fn test_extract_first_year_from_longer_string() {
assert_eq!(extract_first_year("FY ended 2024-12-31"), Some(2024));
}
#[test]
fn test_extract_first_year_out_of_range_low() {
assert_eq!(extract_first_year("1899"), None);
}
#[test]
fn test_extract_first_year_out_of_range_high() {
assert_eq!(extract_first_year("2101"), None);
}
#[test]
fn test_extract_first_year_no_digits() {
assert_eq!(extract_first_year("hello world"), None);
}
#[test]
fn test_extract_first_year_short_string() {
assert_eq!(extract_first_year("23"), None);
}
#[test]
fn test_extract_first_year_empty_string() {
assert_eq!(extract_first_year(""), None);
}
#[test]
fn test_parse_period_year_only() {
assert_eq!(parse_period("2024").unwrap(), Period::Year { year: 2024 });
}
#[test]
fn test_parse_period_year_quarter() {
assert_eq!(
parse_period("2024Q3").unwrap(),
Period::YearQuarter {
year: 2024,
quarter: 3
}
);
}
#[test]
fn test_parse_period_year_fy() {
assert_eq!(
parse_period("2024FY").unwrap(),
Period::YearQuarter {
year: 2024,
quarter: 4
}
);
}
#[test]
fn test_parse_period_year_h1() {
assert_eq!(
parse_period("2024H1").unwrap(),
Period::YearQuarter {
year: 2024,
quarter: 2
}
);
}
#[test]
fn test_parse_period_year_9m() {
assert_eq!(
parse_period("2024 9M").unwrap(),
Period::YearQuarter {
year: 2024,
quarter: 3
}
);
}
#[test]
fn test_parse_period_whitespace_trimmed() {
assert_eq!(
parse_period(" 2024Q2 ").unwrap(),
Period::YearQuarter {
year: 2024,
quarter: 2
}
);
}
#[test]
fn test_parse_period_missing_year() {
let err = parse_period("Q3").unwrap_err();
assert!(err.contains("missing year"));
}
#[test]
fn test_parse_period_empty_string() {
let err = parse_period("").unwrap_err();
assert!(err.contains("missing year"));
}
#[test]
fn test_normalize_symbol_dash_to_dot() {
let c = normalize_symbol("BRK-B");
assert!(c.contains(&"BRK.B".to_string()));
assert!(c.contains(&"BRK-B".to_string()));
}
#[test]
fn test_normalize_symbol_no_change_needed() {
let c = normalize_symbol("AAPL");
assert_eq!(c, vec!["AAPL".to_string()]);
}
#[test]
fn test_normalize_symbol_dot_and_dash_both_present() {
let c = normalize_symbol("brk.b");
assert!(c.contains(&"BRK.B".to_string()));
assert!(c.contains(&"BRK-B".to_string()));
assert_eq!(c.len(), 2);
}
#[test]
fn test_normalize_fp_label_q4_case_insensitive() {
assert_eq!(normalize_fp_label("Q4"), "FY");
assert_eq!(normalize_fp_label("q4"), "FY");
assert_eq!(normalize_fp_label(" Q4 "), "FY");
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Period {
YearQuarter { year: i64, quarter: i64 },
Year { year: i64 },
}
pub fn extract_first_year(s: &str) -> Option<i64> {
let chars: Vec<char> = s.chars().collect();
for i in 0..chars.len().saturating_sub(3) {
if chars[i].is_ascii_digit()
&& chars[i + 1].is_ascii_digit()
&& chars[i + 2].is_ascii_digit()
&& chars[i + 3].is_ascii_digit()
{
let year_str: String = chars[i..=i + 3].iter().collect();
if let Ok(year) = year_str.parse::<i64>()
&& (1900..=2100).contains(&year)
{
return Some(year);
}
}
}
None
}
pub fn parse_period(period: &str) -> Result<Period, String> {
let raw = period.trim();
let upper = raw.to_ascii_uppercase();
let year = extract_first_year(&upper).ok_or_else(|| {
format!(
"Period `{}` is missing year; expected values like 2024Q3, 2024H1, or 2024FY",
raw
)
})?;
if let Some(slot) = parse_period_slot(&upper) {
let q = slot.normalized_quarter();
return Ok(Period::YearQuarter { year, quarter: q });
}
Ok(Period::Year { year })
}
pub const US_GAAP_CSV_META_COLUMNS: &[&str] = &[
"canonical_order",
"fy",
"fp",
"period_end",
"filed",
"form",
"is_amendment",
"accn",
"filing_url",
];