Documentation
use serde::{Deserialize, Serialize};

#[derive(Default, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct School {
    pub id: String,
    pub azonosito: String,
    pub nev: String,
    pub rovid_nev: Option<String>,
    pub om_kod: String,
    pub kreta_link: String,
    pub telepules: String,
    pub aktiv_tanev_id: i64,
    pub aktiv_tanev_guid: String,
    pub aktiv_tanev_nev: String,
    pub kornyezet_id: i64,
    pub kornyezet_nev: String,
    pub kornyezet_teljes_nev: String,
    pub fenntarto_azonosito: String,
    pub fenntarto_nev: String,
}
impl School {
    #[deprecated = "seems like this endpoint is dead :("]
    pub fn fetch_schools_resp() -> crate::Res<crate::http::Response<ureq::Body>> {
        let uri = "https://kretaglobalapi.e-kreta.hu/intezmenyek/kreta/publikus";
        let agent = crate::account::agent_config().build().new_agent();
        let resp = agent.get(uri).call()?;
        Ok(resp)
    }

    pub fn fetch_schools(q: &str) -> crate::Res<Vec<Self>> {
        let resp = School::fetch_schools_matching_resp(q)?;
        log::info!("received filtered raw school html from ekreta api");
        let raw_html = &resp.into_body().read_to_string()?;
        log::trace!("raw html of schools matching {q:?}: {raw_html}");
        let schools = Self::parse_schools_from_html(raw_html)?;
        Ok(schools)
    }

    pub fn fetch_schools_matching_resp(q: &str) -> crate::Res<crate::http::Response<ureq::Body>> {
        let uri =
            format!("https://intezmenykereso.e-kreta.hu/instituteSelector/{q}?showOnlyLive=true");
        let agent = crate::account::agent_config().build().new_agent();
        let resp = agent.get(uri).call()?;
        Ok(resp)
    }

    fn parse_schools_from_html(raw_html: &str) -> crate::Res<Vec<School>> {
        let mut schools = Vec::with_capacity(raw_html.len() / 200);
        for line in raw_html.lines().map(|l| l.trim()).filter(|l| !l.is_empty()) {
            log::trace!("\n\nline: {line:?}");
            const ID_TXT: &str = "data-val=\"";
            let Some(id_start) = line.find(ID_TXT).map(|s| s + ID_TXT.len()) else {
                log::warn!("couldn't get school-id from {line:?}, didn't find {ID_TXT:?}, skipped");
                continue;
            };
            const ID_ERR: &str = "no id of school";
            let id_len = line[id_start..].find('"').ok_or(ID_ERR)?;
            let id = &line[id_start..id_start + id_len];
            log::debug!("id: {id:?}");

            const NAME_ERR: &str = "no name of school";
            const NAME_END_TXT: &str = "</a>";
            let name_start = id_start + id_len + 2;
            let name_len = line[name_start..].find(NAME_END_TXT).ok_or(NAME_ERR)?;
            let raw_name = &line[name_start..name_start + name_len];
            log::debug!("raw name: {raw_name:?}");

            let decoded_name = decode_html_entities(raw_name)?;
            log::debug!("{decoded_name}");
            schools.push(Self {
                azonosito: id.to_string(),
                nev: decoded_name,
                ..Default::default()
            });
        }
        Ok(schools)
    }
}

pub fn decode_html_entities(raw_html: &str) -> crate::Res<String> {
    let bytes = raw_html.as_bytes();
    let mut decoded = String::with_capacity(bytes.len());
    let mut i = 0;

    while i < bytes.len() {
        // try to match "&#x" (hex)
        if bytes[i] == b'&' && i + 3 < bytes.len() && bytes[i + 1] == b'#' && bytes[i + 2] == b'x' {
            let start = i + 3;
            let mut j = start;
            while j < bytes.len() && bytes[j].is_ascii_hexdigit() {
                j += 1;
            }
            if j < bytes.len() && bytes[j] == b';' && j > start {
                let val = u32::from_str_radix(&raw_html[start..j], 16)?;
                decoded.push(char::from_u32(val).ok_or("conversion err")?);
                i = j + 1;
                continue;
            }
        }

        // try to match "&#" (decimal)
        if bytes[i] == b'&' && i + 2 < bytes.len() && bytes[i + 1] == b'#' && bytes[i + 2] != b'x' {
            let start = i + 2;
            let mut j = start;
            while j < bytes.len() && bytes[j].is_ascii_digit() {
                j += 1;
            }
            if j < bytes.len() && bytes[j] == b';' && j > start {
                let val: u32 = raw_html[start..j].parse()?;
                decoded.push(char::from_u32(val).ok_or("conversion err")?);
                i = j + 1;
                continue;
            }
        }

        let c = raw_html[i..].chars().next().ok_or("no next char")?;
        decoded.push(c);
        i += c.len_utf8();
    }

    decoded = decoded
        .replace("&quot;", "\"")
        .replace("&amp;", "&")
        .replace("  ", " ");

    Ok(decoded)
}

#[test]
fn schools_with_e_correctly_decoded() {
    let exp_render = include_str!("../../assets/bab_schools_rendered.txt");
    let raw_html = include_str!("../../assets/bab_school.html");
    let schools = School::parse_schools_from_html(raw_html).unwrap();
    let schools_render = schools
        .iter()
        .fold(String::new(), |res, s| res + &s.nev + "\n");
    assert_eq!(exp_render, schools_render);
}