use chrono::{Datelike, Local, NaiveDate, NaiveDateTime, NaiveTime, TimeZone};
use scraper::{Html, Selector};
use tracing::debug;
use crate::error::{Error, Result};
use crate::types::{EconomicEvent, Impact};
pub struct CalendarParser {
row_selector: Selector,
date_selector: Selector,
currency_selector: Selector,
impact_selector: Selector,
event_selector: Selector,
time_selector: Selector,
actual_selector: Selector,
forecast_selector: Selector,
previous_selector: Selector,
}
impl CalendarParser {
pub fn new() -> Result<Self> {
Ok(Self {
row_selector: Selector::parse("tr[data-event-id]")
.map_err(|e| Error::InvalidSelector(format!("{e:?}")))?,
date_selector: Selector::parse("td.calendar__date")
.map_err(|e| Error::InvalidSelector(format!("{e:?}")))?,
currency_selector: Selector::parse("td.calendar__currency")
.map_err(|e| Error::InvalidSelector(format!("{e:?}")))?,
impact_selector: Selector::parse("td.calendar__impact span")
.map_err(|e| Error::InvalidSelector(format!("{e:?}")))?,
event_selector: Selector::parse("td.calendar__event span.calendar__event-title")
.map_err(|e| Error::InvalidSelector(format!("{e:?}")))?,
time_selector: Selector::parse("td.calendar__time")
.map_err(|e| Error::InvalidSelector(format!("{e:?}")))?,
actual_selector: Selector::parse("td.calendar__actual")
.map_err(|e| Error::InvalidSelector(format!("{e:?}")))?,
forecast_selector: Selector::parse("td.calendar__forecast")
.map_err(|e| Error::InvalidSelector(format!("{e:?}")))?,
previous_selector: Selector::parse("td.calendar__previous")
.map_err(|e| Error::InvalidSelector(format!("{e:?}")))?,
})
}
pub fn parse(&self, html: &str, base_date: NaiveDate) -> Vec<EconomicEvent> {
debug!("Parsing HTML of {} bytes for date {base_date}", html.len());
let document = Html::parse_document(html);
let mut events = Vec::new();
let mut current_date = base_date;
let mut current_time: Option<NaiveTime> = None;
let reference_year = base_date.year();
let row_count = document.select(&self.row_selector).count();
debug!("Found {row_count} event rows in HTML");
for row in document.select(&self.row_selector) {
let event = self.parse_row(&row, &mut current_date, &mut current_time, reference_year);
match event {
Some(e) => {
debug!("Parsed event: {} ({}) - {}", e.name, e.currency, e.impact);
events.push(e);
}
None => {
continue;
}
}
}
events
}
fn parse_row(
&self,
row: &scraper::ElementRef,
current_date: &mut NaiveDate,
current_time: &mut Option<NaiveTime>,
reference_year: i32,
) -> Option<EconomicEvent> {
let date_text = self.extract_text(row, &self.date_selector);
if let Some(parsed_date) = parse_date(&date_text, reference_year) {
debug!("Parsed date from row: {parsed_date}");
*current_date = parsed_date;
*current_time = None;
}
let currency = self.extract_text(row, &self.currency_selector);
if currency.is_empty() {
return None;
}
let impact = self.extract_impact(row).unwrap_or(Impact::Low);
let name = self.extract_text(row, &self.event_selector);
if name.is_empty() {
return None;
}
let time_text = self.extract_text(row, &self.time_selector);
if !time_text.is_empty()
&& time_text != "All Day"
&& time_text != "Tentative"
&& let Some(parsed_time) = parse_time(&time_text)
{
*current_time = Some(parsed_time);
}
let time = current_time.unwrap_or(NaiveTime::from_hms_opt(0, 0, 0).unwrap());
let datetime = NaiveDateTime::new(*current_date, time);
let datetime_local = Local
.from_local_datetime(&datetime)
.single()
.unwrap_or_else(|| Local.from_utc_datetime(&datetime));
let actual = self.extract_text(row, &self.actual_selector);
let forecast = self.extract_text(row, &self.forecast_selector);
let previous = self.extract_text(row, &self.previous_selector);
Some(EconomicEvent {
name,
currency,
impact,
datetime: datetime_local,
actual: if actual.is_empty() {
None
} else {
Some(actual)
},
forecast: if forecast.is_empty() {
None
} else {
Some(forecast)
},
previous: if previous.is_empty() {
None
} else {
Some(previous)
},
})
}
fn extract_text(&self, row: &scraper::ElementRef, selector: &Selector) -> String {
row.select(selector)
.next()
.map(|el| el.text().collect::<String>().trim().to_string())
.unwrap_or_default()
}
fn extract_impact(&self, row: &scraper::ElementRef) -> Option<Impact> {
row.select(&self.impact_selector)
.next()
.and_then(|el| el.value().attr("class").and_then(Impact::from_ff_class))
}
}
fn parse_date(date_str: &str, reference_year: i32) -> Option<NaiveDate> {
let date_str = date_str.trim();
if date_str.is_empty() {
return None;
}
let parts: Vec<&str> = date_str.split_whitespace().collect();
let (month_str, day_str) = match parts.len() {
3 => (parts[1], parts[2]),
2 => {
let first = parts[0];
if first.len() >= 6 {
(&first[3..], parts[1])
} else {
return None;
}
}
_ => return None,
};
let month = match month_str.to_lowercase().as_str() {
"jan" => 1,
"feb" => 2,
"mar" => 3,
"apr" => 4,
"may" => 5,
"jun" => 6,
"jul" => 7,
"aug" => 8,
"sep" => 9,
"oct" => 10,
"nov" => 11,
"dec" => 12,
_ => return None,
};
let day: u32 = day_str.parse().ok()?;
NaiveDate::from_ymd_opt(reference_year, month, day)
}
fn parse_time(time_str: &str) -> Option<NaiveTime> {
let time_str = time_str.trim().to_lowercase();
if let Ok(time) = NaiveTime::parse_from_str(&time_str, "%l:%M%P") {
return Some(time);
}
if let Ok(time) = NaiveTime::parse_from_str(&time_str, "%I:%M%P") {
return Some(time);
}
if let Ok(time) = NaiveTime::parse_from_str(&time_str, "%H:%M") {
return Some(time);
}
None
}
impl Default for CalendarParser {
fn default() -> Self {
Self::new().expect("Default selectors should be valid")
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_time() {
assert_eq!(
parse_time("14:00"),
Some(NaiveTime::from_hms_opt(14, 0, 0).unwrap())
);
}
#[test]
fn test_parser_creation() {
let parser = CalendarParser::new();
assert!(parser.is_ok());
}
#[test]
fn test_parse_date() {
assert_eq!(
parse_date("Tue Jan 13", 2026),
Some(NaiveDate::from_ymd_opt(2026, 1, 13).unwrap())
);
assert_eq!(
parse_date("Mon Feb 3", 2026),
Some(NaiveDate::from_ymd_opt(2026, 2, 3).unwrap())
);
assert_eq!(parse_date("", 2026), None);
assert_eq!(parse_date(" ", 2026), None);
}
}