use std::{borrow::Cow, fmt::Display, str::FromStr};
use chrono::{NaiveDate, NaiveTime};
use regex::Regex;
use thiserror::Error;
use tl::{Node, ParserOptions, VDom, VDomGuard};
use crate::{ParseIdError, Semester};
const CLASSES_PER_PAGE: u32 = 50;
const CLASSES_PER_GROUP: u32 = 3;
const SESSION_FORMAT: &str = r"^University (\d\d?) Week Session$";
macro_rules! SESSION_TAG {
() => {
"SSR_DER_CS_GRP_SESSION_CODE$215$${}"
};
}
const CLASS_ID_FORMAT: &str = r"^Class Nbr (\d+) - Section ([A-Z](?:\d?)+) ([A-Z]+)$";
const CLASS_ID_TAG_SEQ: [u32; 3] = [294, 295, 296];
macro_rules! CLASS_ID_TAG {
() => {
"SSR_CLSRCH_F_WK_SSR_CMPNT_DESCR_{}${}$${}"
};
}
const DATES_TIME_FORMAT: &str = "%m/%d/%Y";
macro_rules! DATES_TAG {
() => {
"SSR_CLSRCH_F_WK_SSR_MTG_DT_LONG_1$88$${}"
};
}
const DATETIME_TIME_FORMAT: &str = "%-I:%M%p";
const DATETIME_FORMAT: &str =
r"^((?:[A-Z][a-z]+\s)+)(\d?\d:\d\d(?:AM|PM)) to (\d?\d:\d\d(?:AM|PM))$";
const DATETIME_TAG_SEQ: [u32; 3] = [134, 135, 154];
macro_rules! DATETIME_TAG {
() => {
"SSR_CLSRCH_F_WK_SSR_MTG_SCHED_L_{}${}$${}"
};
}
macro_rules! ROOM_TAG {
() => {
"SSR_CLSRCH_F_WK_SSR_MTG_LOC_LONG_{}${}"
};
}
const INSTRUCTOR_TAG_SEQ: [u32; 3] = [86, 161, 162];
macro_rules! INSTRUCTOR_TAG {
() => {
"SSR_CLSRCH_F_WK_SSR_INSTR_LONG_{}${}$${}"
};
}
const SEATS_FORMAT: &str = r"^Open Seats (\d+) of (\d+)$";
macro_rules! SEATS_TAG {
() => {
"SSR_CLSRCH_F_WK_SSR_DESCR50_{}${}"
};
}
#[derive(Debug)]
pub struct ClassSchedule {
dom: VDomGuard,
page: u32,
}
impl ClassSchedule {
pub fn new(bytes: Vec<u8>, page: u32) -> Result<Self, ParseError> {
let dom = unsafe { tl::parse_owned(String::from_utf8(bytes)?, ParserOptions::default())? };
Ok(Self { dom, page })
}
pub fn semester(&self) -> Result<Semester, ParseError> {
get_text_from_id_without_sub_nodes(self.dom.get_ref(), "TERM_VAL_TBL_DESCR")?
.parse::<Semester>()
.map_err(|err| err.into())
}
pub fn group_iter(&self) -> impl Iterator<Item = ClassGroup<'_>> + '_ {
let first_class_index = self.page.saturating_sub(1) * CLASSES_PER_PAGE;
let last_class_index = (self.page * CLASSES_PER_PAGE).saturating_sub(1);
(first_class_index..last_class_index).map(|group_num| ClassGroup {
dom: self.dom.get_ref(),
group_num,
})
}
}
#[derive(Debug, Clone)]
pub struct ClassGroup<'a> {
dom: &'a VDom<'a>,
group_num: u32,
}
impl<'a> ClassGroup<'a> {
pub fn class_iter(&self) -> impl Iterator<Item = Class<'a>> + '_ {
(0..CLASSES_PER_GROUP).map(|class_num| Class {
dom: self.dom,
class_num,
group_num: self.group_num,
})
}
pub fn session(&self) -> Result<u32, ParseError> {
let session =
get_text_from_id_without_sub_nodes(self.dom, &format!(SESSION_TAG!(), self.group_num))?;
let re = Regex::new(SESSION_FORMAT)
.unwrap()
.captures(session)
.ok_or(ParseError::UnknownElementFormat)?;
re.get(1)
.ok_or(ParseError::UnknownElementFormat)?
.as_str()
.parse()
.map_err(|_| ParseError::UnknownElementFormat)
}
pub fn start_date(&self) -> Result<NaiveDate, ParseError> {
Ok(self.dates()?.0)
}
pub fn end_date(&self) -> Result<NaiveDate, ParseError> {
Ok(self.dates()?.1)
}
fn dates(&self) -> Result<(NaiveDate, NaiveDate), ParseError> {
let dates =
get_text_from_id_without_sub_nodes(self.dom, &format!(DATES_TAG!(), self.group_num))?;
let mut split_dates = dates.split("Â - ");
Ok((
NaiveDate::parse_from_str(
split_dates.next().ok_or(ParseError::UnknownElementFormat)?,
DATES_TIME_FORMAT,
)
.or(Err(ParseError::UnknownElementFormat))?,
NaiveDate::parse_from_str(
split_dates.next().ok_or(ParseError::UnknownElementFormat)?,
DATES_TIME_FORMAT,
)
.or(Err(ParseError::UnknownElementFormat))?,
))
}
}
#[derive(Debug, Clone)]
pub struct Class<'a> {
dom: &'a VDom<'a>,
class_num: u32,
group_num: u32,
}
impl Class<'_> {
pub fn is_open(&self) -> Result<bool, ParseError> {
let seats = get_text_from_id_without_sub_nodes(
self.dom,
&format!(SEATS_TAG!(), self.class_num + 1, self.group_num),
)?;
if seats == "Closed" {
return Ok(false);
}
Ok(true)
}
pub fn class_type(&self) -> Result<ClassType, ParseError> {
self.class_info()
.map(|info| info.2.parse().map_err(|_| ParseError::UnknownElementFormat))?
}
pub fn class_id(&self) -> Result<u32, ParseError> {
self.class_info()
.map(|info| info.0.parse().map_err(|_| ParseError::UnknownElementFormat))?
}
pub fn section(&self) -> Result<&str, ParseError> {
self.class_info().map(|info| info.1)
}
pub fn days_of_week(&self) -> Result<Option<Vec<Result<DayOfWeek, ParseError>>>, ParseError> {
self.datetime().map(|result| {
result.map(|datetime| {
datetime
.0
.iter()
.map(|days| days.parse().map_err(|_| ParseError::UnknownElementFormat))
.collect()
})
})
}
pub fn start_time(&self) -> Result<Option<NaiveTime>, ParseError> {
self.datetime()
.map(|result| {
result.map(|datetime| {
NaiveTime::parse_from_str(&datetime.1, DATETIME_TIME_FORMAT)
.map_err(|_| ParseError::UnknownElementFormat)
})
})?
.transpose()
}
pub fn end_time(&self) -> Result<Option<NaiveTime>, ParseError> {
self.datetime()
.map(|result| {
result.map(|datetime| {
NaiveTime::parse_from_str(&datetime.2, DATETIME_TIME_FORMAT)
.map_err(|_| ParseError::UnknownElementFormat)
})
})?
.transpose()
}
pub fn room(&self) -> Result<&str, ParseError> {
get_text_from_id_without_sub_nodes(
self.dom,
&format!(ROOM_TAG!(), self.class_num + 1, self.group_num),
)
}
pub fn instructor(&self) -> Result<&str, ParseError> {
get_text_from_id_without_sub_nodes(
self.dom,
&format!(
INSTRUCTOR_TAG!(),
self.class_num + 1,
INSTRUCTOR_TAG_SEQ[self.class_num as usize],
self.group_num
),
)
}
pub fn open_seats(&self) -> Result<Option<u32>, ParseError> {
self.seats().map(|seats| seats.map(|seats| seats.0))
}
pub fn total_seats(&self) -> Result<Option<u32>, ParseError> {
self.seats().map(|seats| seats.map(|seats| seats.1))
}
fn class_info(&self) -> Result<(&str, &str, &str), ParseError> {
let class_info = get_text_from_id_without_sub_nodes(
self.dom,
&format!(
CLASS_ID_TAG!(),
self.class_num + 1,
CLASS_ID_TAG_SEQ[self.class_num as usize],
self.group_num
),
)?;
let re = Regex::new(CLASS_ID_FORMAT)
.unwrap()
.captures(class_info)
.ok_or(ParseError::UnknownElementFormat)?;
Ok((
re.get(1).ok_or(ParseError::UnknownElementFormat)?.as_str(),
re.get(2).ok_or(ParseError::UnknownElementFormat)?.as_str(),
re.get(3).ok_or(ParseError::UnknownElementFormat)?.as_str(),
))
}
fn datetime(&self) -> Result<Option<(Vec<String>, String, String)>, ParseError> {
get_node_from_id(
self.dom,
&format!(
DATETIME_TAG!(),
self.class_num + 1,
DATETIME_TAG_SEQ[self.class_num as usize],
self.group_num
),
)
.map_or_else(
|err| match err {
ParseError::MissingTag => Ok(None),
_ => Err(err),
},
|node| {
match node.inner_text(self.dom.parser()) {
Cow::Borrowed(_) => Err(ParseError::UnknownHtmlFormat),
Cow::Owned(value) => {
let re = Regex::new(DATETIME_FORMAT)
.unwrap()
.captures(&value)
.ok_or(ParseError::UnknownElementFormat)?;
Ok(Some((
re.get(1)
.ok_or(ParseError::UnknownElementFormat)?
.as_str()
.split_whitespace()
.map(|string| string.to_owned())
.collect(), re.get(2)
.ok_or(ParseError::UnknownElementFormat)?
.as_str()
.to_owned(), re.get(3)
.ok_or(ParseError::UnknownElementFormat)?
.as_str()
.to_owned(), )))
}
}
},
)
}
fn seats(&self) -> Result<Option<(u32, u32)>, ParseError> {
let seats = get_text_from_id_without_sub_nodes(
self.dom,
&format!(SEATS_TAG!(), self.class_num + 1, self.group_num),
)?;
match seats {
"Closed" => Ok(None),
_ => {
let re = Regex::new(SEATS_FORMAT)
.unwrap()
.captures(seats)
.ok_or(ParseError::UnknownElementFormat)?;
Ok(Some((
re.get(1)
.ok_or(ParseError::UnknownElementFormat)?
.as_str()
.parse()
.map_err(|_| ParseError::UnknownElementFormat)?, re.get(2)
.ok_or(ParseError::UnknownHtmlFormat)?
.as_str()
.parse()
.map_err(|_| ParseError::UnknownElementFormat)?, )))
}
}
}
}
#[derive(Debug, Clone, Copy)]
pub enum ClassType {
Recitation,
Lab,
Lecture,
Seminar,
}
impl FromStr for ClassType {
type Err = ParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"REC" => ClassType::Recitation,
"LAB" => ClassType::Lab,
"LEC" => ClassType::Lecture,
"SEM" => ClassType::Seminar,
_ => return Err(ParseError::UnknownElementFormat),
})
}
}
impl Display for ClassType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}",
match self {
ClassType::Recitation => "Recitation",
ClassType::Lab => "Lab",
ClassType::Lecture => "Lecture",
ClassType::Seminar => "Seminar",
}
)
}
}
#[derive(Debug, Clone, Copy)]
pub enum DayOfWeek {
Sunday,
Monday,
Tuesday,
Wednesday,
Thursday,
Friday,
Saturday,
}
impl FromStr for DayOfWeek {
type Err = ParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"Sunday" => DayOfWeek::Sunday,
"Monday" => DayOfWeek::Monday,
"Tuesday" => DayOfWeek::Tuesday,
"Wednesday" => DayOfWeek::Wednesday,
"Thursday" => DayOfWeek::Thursday,
"Friday" => DayOfWeek::Friday,
"Saturday" => DayOfWeek::Saturday,
_ => return Err(ParseError::UnknownElementFormat),
})
}
}
impl Display for DayOfWeek {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}",
match self {
DayOfWeek::Sunday => "Sunday",
DayOfWeek::Monday => "Monday",
DayOfWeek::Tuesday => "Tuesday",
DayOfWeek::Wednesday => "Wednesday",
DayOfWeek::Thursday => "Thursday",
DayOfWeek::Friday => "Friday",
DayOfWeek::Saturday => "Saturday",
}
)
}
}
fn get_text_from_id_without_sub_nodes<'a>(dom: &'a VDom, id: &str) -> Result<&'a str, ParseError> {
match get_node_from_id(dom, id)?.inner_text(dom.parser()) {
Cow::Borrowed(string) => Ok(string),
Cow::Owned(_) => Err(ParseError::UnknownHtmlFormat),
}
}
fn get_node_from_id<'a>(dom: &'a VDom, id: &str) -> Result<&'a Node<'a>, ParseError> {
Ok(dom
.get_element_by_id(id)
.ok_or(ParseError::MissingTag)?
.get(dom.parser())
.unwrap())
}
#[derive(Debug, Error)]
pub enum ParseError {
#[error(transparent)]
UnknownIdFormat(#[from] ParseIdError),
#[error("could not parse HTML due to invalid Utf-8 encoding")]
HtmlInvalidUtf8(#[from] std::string::FromUtf8Error),
#[error("could not parse HTML due to invalid format")]
InvalidHtmlFormat(#[from] tl::errors::ParseError),
#[error("could not find tags in HTML")]
EmptyHtml,
#[error("format of HTML could not be parsed because it is unknown")]
UnknownHtmlFormat,
#[error("format of element could not be parsed because it is unknown")]
UnknownElementFormat,
#[error("could not find tag for class in HTML")]
MissingTag,
}