use serde::{Deserialize, Serialize};
#[derive(Default, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct School {
pub id: String,
pub azonosito: String,
pub nev: String,
pub rovid_nev: Option<String>,
pub om_kod: String,
pub kreta_link: String,
pub telepules: String,
pub aktiv_tanev_id: i64,
pub aktiv_tanev_guid: String,
pub aktiv_tanev_nev: String,
pub kornyezet_id: i64,
pub kornyezet_nev: String,
pub kornyezet_teljes_nev: String,
pub fenntarto_azonosito: String,
pub fenntarto_nev: String,
}
impl School {
#[deprecated = "seems like this endpoint is dead :("]
pub fn fetch_schools_resp() -> crate::Res<crate::http::Response<ureq::Body>> {
let uri = "https://kretaglobalapi.e-kreta.hu/intezmenyek/kreta/publikus";
let agent = crate::account::agent_config().build().new_agent();
let resp = agent.get(uri).call()?;
Ok(resp)
}
pub fn fetch_schools(q: &str) -> crate::Res<Vec<Self>> {
let resp = School::fetch_schools_matching_resp(q)?;
log::info!("received filtered raw school html from ekreta api");
let raw_html = &resp.into_body().read_to_string()?;
log::trace!("raw html of schools matching {q:?}: {raw_html}");
let schools = Self::parse_schools_from_html(raw_html)?;
Ok(schools)
}
pub fn fetch_schools_matching_resp(q: &str) -> crate::Res<crate::http::Response<ureq::Body>> {
let uri =
format!("https://intezmenykereso.e-kreta.hu/instituteSelector/{q}?showOnlyLive=true");
let agent = crate::account::agent_config().build().new_agent();
let resp = agent.get(uri).call()?;
Ok(resp)
}
fn parse_schools_from_html(raw_html: &str) -> crate::Res<Vec<School>> {
let mut schools = Vec::with_capacity(raw_html.len() / 200);
for line in raw_html.lines().map(|l| l.trim()).filter(|l| !l.is_empty()) {
log::trace!("\n\nline: {line:?}");
const ID_TXT: &str = "data-val=\"";
let Some(id_start) = line.find(ID_TXT).map(|s| s + ID_TXT.len()) else {
log::warn!("couldn't get school-id from {line:?}, didn't find {ID_TXT:?}, skipped");
continue;
};
const ID_ERR: &str = "no id of school";
let id_len = line[id_start..].find('"').ok_or(ID_ERR)?;
let id = &line[id_start..id_start + id_len];
log::debug!("id: {id:?}");
const NAME_ERR: &str = "no name of school";
const NAME_END_TXT: &str = "</a>";
let name_start = id_start + id_len + 2;
let name_len = line[name_start..].find(NAME_END_TXT).ok_or(NAME_ERR)?;
let raw_name = &line[name_start..name_start + name_len];
log::debug!("raw name: {raw_name:?}");
let decoded_name = decode_html_entities(raw_name)?;
log::debug!("{decoded_name}");
schools.push(Self {
azonosito: id.to_string(),
nev: decoded_name,
..Default::default()
});
}
Ok(schools)
}
}
pub fn decode_html_entities(raw_html: &str) -> crate::Res<String> {
let bytes = raw_html.as_bytes();
let mut decoded = String::with_capacity(bytes.len());
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'&' && i + 3 < bytes.len() && bytes[i + 1] == b'#' && bytes[i + 2] == b'x' {
let start = i + 3;
let mut j = start;
while j < bytes.len() && bytes[j].is_ascii_hexdigit() {
j += 1;
}
if j < bytes.len() && bytes[j] == b';' && j > start {
let val = u32::from_str_radix(&raw_html[start..j], 16)?;
decoded.push(char::from_u32(val).ok_or("conversion err")?);
i = j + 1;
continue;
}
}
if bytes[i] == b'&' && i + 2 < bytes.len() && bytes[i + 1] == b'#' && bytes[i + 2] != b'x' {
let start = i + 2;
let mut j = start;
while j < bytes.len() && bytes[j].is_ascii_digit() {
j += 1;
}
if j < bytes.len() && bytes[j] == b';' && j > start {
let val: u32 = raw_html[start..j].parse()?;
decoded.push(char::from_u32(val).ok_or("conversion err")?);
i = j + 1;
continue;
}
}
let c = raw_html[i..].chars().next().ok_or("no next char")?;
decoded.push(c);
i += c.len_utf8();
}
decoded = decoded
.replace(""", "\"")
.replace("&", "&")
.replace(" ", " ");
Ok(decoded)
}
#[test]
fn schools_with_e_correctly_decoded() {
let exp_render = include_str!("../../assets/bab_schools_rendered.txt");
let raw_html = include_str!("../../assets/bab_school.html");
let schools = School::parse_schools_from_html(raw_html).unwrap();
let schools_render = schools
.iter()
.fold(String::new(), |res, s| res + &s.nev + "\n");
assert_eq!(exp_render, schools_render);
}