1#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub enum PeriodSlot {
27 Q1,
28 Q2,
29 Q3,
30 Q4,
31 FY,
32 H1,
33 H2,
34 M3,
35 M6,
36 M9,
37 M12,
38}
39
40impl PeriodSlot {
41 pub fn normalized_quarter(self) -> i64 {
53 match self {
54 PeriodSlot::Q1 | PeriodSlot::M3 => 1,
55 PeriodSlot::Q2 | PeriodSlot::H1 | PeriodSlot::M6 => 2,
56 PeriodSlot::Q3 | PeriodSlot::M9 => 3,
57 PeriodSlot::Q4 | PeriodSlot::FY | PeriodSlot::H2 | PeriodSlot::M12 => 4,
58 }
59 }
60}
61
62pub fn parse_quarter_token(s: &str) -> Option<i64> {
65 let upper = s.to_ascii_uppercase();
66 let pos = upper.find('Q')?;
67 let n = upper[pos + 1..]
68 .chars()
69 .next()
70 .and_then(|c| c.to_digit(10))
71 .map(i64::from)?;
72 if (1..=4).contains(&n) { Some(n) } else { None }
73}
74
75pub fn parse_period_slot(s: &str) -> Option<PeriodSlot> {
84 let upper = s.trim().to_ascii_uppercase();
85
86 if let Some(q) = parse_quarter_token(&upper) {
87 return Some(match q {
88 1 => PeriodSlot::Q1,
89 2 => PeriodSlot::Q2,
90 3 => PeriodSlot::Q3,
91 4 => PeriodSlot::Q4,
92 _ => return None,
93 });
94 }
95
96 if upper.contains("FY") || upper.contains("ANNUAL") {
97 return Some(PeriodSlot::FY);
98 }
99
100 if upper.contains("HY1")
102 || upper.contains("H1")
103 || upper.contains("SA1")
104 || upper.contains("S1")
105 {
106 return Some(PeriodSlot::H1);
107 }
108 if upper.contains("HY2")
109 || upper.contains("H2")
110 || upper.contains("SA2")
111 || upper.contains("S2")
112 {
113 return Some(PeriodSlot::H2);
114 }
115
116 if upper.contains("12M") {
118 return Some(PeriodSlot::M12);
119 }
120 if upper.contains("9M") {
121 return Some(PeriodSlot::M9);
122 }
123 if upper.contains("6M") {
124 return Some(PeriodSlot::M6);
125 }
126 if upper.contains("3M") {
127 return Some(PeriodSlot::M3);
128 }
129
130 None
131}
132
133pub fn parse_period_slot_token(s: &str) -> Option<i64> {
149 parse_period_slot(s).map(PeriodSlot::normalized_quarter)
150}
151
152pub fn normalize_fp_label(fp: &str) -> String {
172 if fp.trim().eq_ignore_ascii_case("Q4") {
173 "FY".to_string()
174 } else {
175 fp.to_string()
176 }
177}
178
179pub fn normalize_symbol(symbol: &str) -> Vec<String> {
193 let upper = symbol.to_ascii_uppercase();
194 let dot_to_dash = upper.replace('.', "-");
195 let dash_to_dot = upper.replace('-', ".");
196
197 let mut set = std::collections::HashSet::new();
198 set.insert(upper);
199 set.insert(dot_to_dash);
200 set.insert(dash_to_dot);
201
202 let mut out: Vec<String> = set.into_iter().collect();
203 out.sort();
204 out
205}
206
207#[cfg(test)]
208mod tests {
209 use super::*;
210
211 #[test]
212 fn quarters_rank_correctly() {
213 assert_eq!(parse_period_slot_token("Q1"), Some(1));
214 assert_eq!(parse_period_slot_token("Q2"), Some(2));
215 assert_eq!(parse_period_slot_token("Q3"), Some(3));
216 assert_eq!(parse_period_slot_token("Q4"), Some(4));
217 }
218
219 #[test]
220 fn fy_ranks_same_as_q4() {
221 assert_eq!(parse_period_slot_token("FY"), Some(4));
222 }
223
224 #[test]
225 fn semi_annual_aliases() {
226 assert_eq!(parse_period_slot_token("H1"), Some(2));
227 assert_eq!(parse_period_slot_token("H2"), Some(4));
228 assert_eq!(parse_period_slot_token("HY1"), Some(2));
229 assert_eq!(parse_period_slot_token("HY2"), Some(4));
230 assert_eq!(parse_period_slot_token("SA1"), Some(2));
231 assert_eq!(parse_period_slot_token("SA2"), Some(4));
232 assert_eq!(parse_period_slot_token("S1"), Some(2));
233 assert_eq!(parse_period_slot_token("S2"), Some(4));
234 }
235
236 #[test]
237 fn month_window_aliases() {
238 assert_eq!(parse_period_slot_token("3M"), Some(1));
239 assert_eq!(parse_period_slot_token("6M"), Some(2));
240 assert_eq!(parse_period_slot_token("9M"), Some(3));
241 assert_eq!(parse_period_slot_token("12M"), Some(4));
242 }
243
244 #[test]
245 fn unrecognised_returns_none() {
246 assert_eq!(parse_period_slot_token(""), None);
247 assert_eq!(parse_period_slot_token("SA"), None);
248 assert_eq!(parse_period_slot_token("Q5"), None);
249 }
250
251 #[test]
252 fn case_insensitive() {
253 assert_eq!(parse_period_slot_token("fy"), Some(4));
254 assert_eq!(parse_period_slot_token("q2"), Some(2));
255 assert_eq!(parse_period_slot_token("sa2"), Some(4));
256 }
257
258 #[test]
259 fn normalize_fp_label_maps_q4_to_fy() {
260 assert_eq!(normalize_fp_label("Q4"), "FY");
261 assert_eq!(normalize_fp_label("q4"), "FY");
262 }
263
264 #[test]
265 fn normalize_fp_label_leaves_other_tokens_unchanged() {
266 assert_eq!(normalize_fp_label("FY"), "FY");
267 assert_eq!(normalize_fp_label("Q3"), "Q3");
268 assert_eq!(normalize_fp_label("H1"), "H1");
269 assert_eq!(normalize_fp_label("SA2"), "SA2");
270 assert_eq!(normalize_fp_label(""), "");
271 }
272
273 #[test]
274 fn normalize_symbol_generates_dot_and_dash_variants() {
275 let c = normalize_symbol("brk.b");
276 assert!(c.contains(&"BRK.B".to_string()));
277 assert!(c.contains(&"BRK-B".to_string()));
278 }
279
280 #[test]
281 fn normalize_symbol_upcases_plain_ticker() {
282 let c = normalize_symbol("aapl");
283 assert_eq!(c, vec!["AAPL".to_string()]);
284 }
285
286 #[test]
289 fn test_extract_first_year_from_combined() {
290 assert_eq!(extract_first_year("2024Q3"), Some(2024));
291 }
292
293 #[test]
294 fn test_extract_first_year_from_plain_year() {
295 assert_eq!(extract_first_year("2024"), Some(2024));
296 }
297
298 #[test]
299 fn test_extract_first_year_from_longer_string() {
300 assert_eq!(extract_first_year("FY ended 2024-12-31"), Some(2024));
301 }
302
303 #[test]
304 fn test_extract_first_year_out_of_range_low() {
305 assert_eq!(extract_first_year("1899"), None);
306 }
307
308 #[test]
309 fn test_extract_first_year_out_of_range_high() {
310 assert_eq!(extract_first_year("2101"), None);
311 }
312
313 #[test]
314 fn test_extract_first_year_no_digits() {
315 assert_eq!(extract_first_year("hello world"), None);
316 }
317
318 #[test]
319 fn test_extract_first_year_short_string() {
320 assert_eq!(extract_first_year("23"), None);
321 }
322
323 #[test]
324 fn test_extract_first_year_empty_string() {
325 assert_eq!(extract_first_year(""), None);
326 }
327
328 #[test]
331 fn test_parse_period_year_only() {
332 assert_eq!(parse_period("2024").unwrap(), Period::Year { year: 2024 });
333 }
334
335 #[test]
336 fn test_parse_period_year_quarter() {
337 assert_eq!(
338 parse_period("2024Q3").unwrap(),
339 Period::YearQuarter {
340 year: 2024,
341 quarter: 3
342 }
343 );
344 }
345
346 #[test]
347 fn test_parse_period_year_fy() {
348 assert_eq!(
349 parse_period("2024FY").unwrap(),
350 Period::YearQuarter {
351 year: 2024,
352 quarter: 4
353 }
354 );
355 }
356
357 #[test]
358 fn test_parse_period_year_h1() {
359 assert_eq!(
360 parse_period("2024H1").unwrap(),
361 Period::YearQuarter {
362 year: 2024,
363 quarter: 2
364 }
365 );
366 }
367
368 #[test]
369 fn test_parse_period_year_9m() {
370 assert_eq!(
371 parse_period("2024 9M").unwrap(),
372 Period::YearQuarter {
373 year: 2024,
374 quarter: 3
375 }
376 );
377 }
378
379 #[test]
380 fn test_parse_period_whitespace_trimmed() {
381 assert_eq!(
382 parse_period(" 2024Q2 ").unwrap(),
383 Period::YearQuarter {
384 year: 2024,
385 quarter: 2
386 }
387 );
388 }
389
390 #[test]
391 fn test_parse_period_missing_year() {
392 let err = parse_period("Q3").unwrap_err();
393 assert!(err.contains("missing year"));
394 }
395
396 #[test]
397 fn test_parse_period_empty_string() {
398 let err = parse_period("").unwrap_err();
399 assert!(err.contains("missing year"));
400 }
401
402 #[test]
405 fn test_normalize_symbol_dash_to_dot() {
406 let c = normalize_symbol("BRK-B");
407 assert!(c.contains(&"BRK.B".to_string()));
408 assert!(c.contains(&"BRK-B".to_string()));
409 }
410
411 #[test]
412 fn test_normalize_symbol_no_change_needed() {
413 let c = normalize_symbol("AAPL");
414 assert_eq!(c, vec!["AAPL".to_string()]);
415 }
416
417 #[test]
418 fn test_normalize_symbol_dot_and_dash_both_present() {
419 let c = normalize_symbol("brk.b");
420 assert!(c.contains(&"BRK.B".to_string()));
421 assert!(c.contains(&"BRK-B".to_string()));
422 assert_eq!(c.len(), 2);
424 }
425
426 #[test]
429 fn test_normalize_fp_label_q4_case_insensitive() {
430 assert_eq!(normalize_fp_label("Q4"), "FY");
431 assert_eq!(normalize_fp_label("q4"), "FY");
432 assert_eq!(normalize_fp_label(" Q4 "), "FY");
433 }
434}
435
436#[derive(Debug, Clone, Copy, PartialEq, Eq)]
441pub enum Period {
442 YearQuarter { year: i64, quarter: i64 },
443 Year { year: i64 },
444}
445
446pub fn extract_first_year(s: &str) -> Option<i64> {
451 let chars: Vec<char> = s.chars().collect();
452 for i in 0..chars.len().saturating_sub(3) {
453 if chars[i].is_ascii_digit()
454 && chars[i + 1].is_ascii_digit()
455 && chars[i + 2].is_ascii_digit()
456 && chars[i + 3].is_ascii_digit()
457 {
458 let year_str: String = chars[i..=i + 3].iter().collect();
459 if let Ok(year) = year_str.parse::<i64>()
460 && (1900..=2100).contains(&year)
461 {
462 return Some(year);
463 }
464 }
465 }
466 None
467}
468
469pub fn parse_period(period: &str) -> Result<Period, String> {
483 let raw = period.trim();
484 let upper = raw.to_ascii_uppercase();
485 let year = extract_first_year(&upper).ok_or_else(|| {
486 format!(
487 "Period `{}` is missing year; expected values like 2024Q3, 2024H1, or 2024FY",
488 raw
489 )
490 })?;
491 if let Some(slot) = parse_period_slot(&upper) {
492 let q = slot.normalized_quarter();
493 return Ok(Period::YearQuarter { year, quarter: q });
494 }
495 Ok(Period::Year { year })
496}
497
498pub const US_GAAP_CSV_META_COLUMNS: &[&str] = &[
522 "canonical_order",
523 "fy",
524 "fp",
525 "period_end",
526 "filed",
527 "form",
528 "is_amendment",
529 "accn",
530 "filing_url",
531];