lr2-oxytabler 0.3.0

Table manager for Lunatic Rave 2
use anyhow::{Context, Result};

#[derive(Debug, Clone)]
pub struct Request {
    web_url: crate::ResolvedUrl,
    playlist_id: Option<crate::PlaylistId>,
    user_symbol: Option<String>,
}

impl Request {
    pub fn new_for_table(table: &crate::Table) -> Self {
        Self {
            web_url: table.0.web_url.clone(),
            playlist_id: table.1.playlist_id,
            user_symbol: table.1.user_symbol.clone(),
        }
    }

    pub fn find<'a>(&self, tables: &'a mut [crate::Table]) -> Option<&'a mut crate::Table> {
        tables
            .iter_mut()
            .find(|t| t.1.playlist_id == self.playlist_id || t.0.web_url.0 == self.web_url.0)
    }

    #[cfg(test)]
    fn new_new(web_url: impl Into<String>) -> Self {
        Self {
            web_url: web_url.into().try_into().unwrap(),
            playlist_id: None,
            user_symbol: None,
        }
    }
}

pub async fn fetch_table(
    client: &reqwest::Client,
    req: Request,
    now: crate::UnixEpochTs,
) -> Result<crate::Table> {
    let web_url = req.web_url;
    let playlist_id = req.playlist_id;
    let user_symbol = req.user_symbol;

    let web_page = fetch_url(client, &web_url)
        .await
        .with_context(|| format!("Failed to fetch table web page from {}", web_url.0))?;

    let (header, header_url) = fetch_table_detail_header_fetch(client, &web_url, &web_page)
        .await
        .context("failed to fetch table header")?;

    let header = fetch_table_detail_header_parse(&header_url, &header)
        .context("failed to parse table header")?;

    let entries = fetch_table_detail_data(client, &header.data_url)
        .await
        .context("failed to fetch or parse table data")?;

    Ok(crate::Table(
        crate::TableData {
            web_url,
            name: header.name,
            symbol: header.symbol,
            header_url,
            data_url: header.data_url,
            entries,
            folder_order: header.level_order,
        },
        crate::TableAddData {
            last_update: Some(now),
            playlist_id,
            user_symbol,
            edited_symbol: None,
            edited_url: None,
            pending_removal: false,
            being_updated: false,
            update_errors: vec![],
        },
    ))
}

struct TableHeader {
    name: String,
    symbol: String,
    data_url: crate::ResolvedUrl,
    level_order: Vec<String>,
}

async fn fetch_url(client: &reqwest::Client, url: &crate::ResolvedUrl) -> Result<String> {
    log::info!("getting url={}", url.0);
    if let Some(url) = url.0.strip_prefix("file://") {
        std::fs::read_to_string(url).with_context(|| format!("failed to read file {url}"))
    } else {
        let body = client
            .get(&url.0)
            .timeout(std::time::Duration::from_secs(30))
            .send()
            .await
            .with_context(|| format!("failed to get url {}", url.0))?
            .text()
            .await
            .context("failed to decode downloaded data")?;
        log::debug!("fetched body: {body}");
        Ok(body)
    }
}

async fn fetch_table_detail_header_fetch(
    client: &reqwest::Client,
    web_url: &crate::ResolvedUrl,
    web_page: &str,
) -> Result<(String, crate::ResolvedUrl)> {
    match crate::parsing::extract_raw_header_url(web_page) {
        Ok(raw_header_url) => {
            let header_url = web_url.resolve_json_url(raw_header_url).with_context(|| {
                format!(
                    "Failed to resolve header URL from {} {}",
                    web_url.0, raw_header_url
                )
            })?;
            let header = fetch_url(client, &header_url)
                .await
                .with_context(|| format!("Failed to fetch header from {}", header_url.0))?;
            Ok((header, header_url))
        }
        Err(e) => {
            log::debug!(
                "Failed to extract table header URL, this is fine if this URL, {}, is already a header URL. Error: {e:?}",
                web_url.0
            );
            Ok((web_page.to_string(), web_url.clone()))
        }
    }
}

fn fetch_table_detail_header_parse(
    header_url: &crate::ResolvedUrl,
    header: &str,
) -> Result<TableHeader> {
    #[derive(Debug, serde::Deserialize)]
    #[serde(untagged)]
    enum NumOrString {
        Num(i64),
        String(String),
    }

    #[derive(Debug, serde::Deserialize)]
    struct TableHeaderForParsing {
        name: String,
        symbol: String,
        /// Relative to current URL. For example:
        /// `https://example.com/table/header.json`
        /// `score.json` would refer to `https://example.com/table/score.json`.
        data_url: String,
        /// Aliased as `folder_order` in some tables. idc
        /// Dystopia uses a mix of numbers and strings.
        level_order: Option<Vec<NumOrString>>,
    }

    let header: TableHeaderForParsing = serde_json::from_str(header).with_context(|| {
        format!(
            "Failed to parse header, header_url={}, header={header}",
            header_url.0
        )
    })?;

    let data_url = header_url
        .resolve_json_url(&header.data_url)
        .with_context(|| {
            format!(
                "failed to resolve data_url from header_url={}, raw_data_url={}",
                header_url.0, &header.data_url
            )
        })?;

    let level_order = header
        .level_order
        .unwrap_or_default()
        .into_iter()
        .map(|v| match v {
            NumOrString::Num(v) => v.to_string(),
            NumOrString::String(v) => v,
        })
        .collect();

    Ok(TableHeader {
        name: header.name,
        symbol: header.symbol,
        data_url,
        level_order,
    })
}

async fn fetch_table_detail_data(
    client: &reqwest::Client,
    data_url: &crate::ResolvedUrl,
) -> Result<Vec<crate::TableEntry>> {
    #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, serde::Deserialize)]
    struct JsonTableDataEntry {
        // Some new tables omit 'md5' and only include 'sha256', especially for bmson files.
        md5: Option<String>,
        level: String,
    }

    let entries = fetch_url(client, data_url)
        .await
        .with_context(|| format!("Failed to fetch data from {}", data_url.0))?;

    let data = serde_json::from_str::<Vec<JsonTableDataEntry>>(&entries)
        .with_context(|| format!("Failed to parse data, data={entries}"))?;
    let orig_len = data.len();
    let mut data = data
        .into_iter()
        .filter_map(|t| {
            const MD5_LEN: usize = 32;
            // Normal 2, Gachimijoy, Mini-jack tables, all have empty entries.
            if t.md5.as_ref()?.len() == MD5_LEN {
                Some(crate::TableEntry {
                    md5: t.md5?,
                    level: t.level,
                })
            } else {
                None
            }
        })
        .collect::<Vec<_>>();
    // Some tables have completely duplicated entries which is against our DB schema constraint.
    data.sort();
    data.dedup();
    log::debug!(
        "{}: parsed {}/{} good and unique entries",
        data_url.0,
        data.len(),
        orig_len
    );
    Ok(data)
}

#[cfg(test)]
mod tests {
    use test_log::test;

    const NEVER: u64 = u64::MAX;

    #[test]
    fn unit_test_header_parse() {
        use crate::fetch::fetch_table_detail_header_parse;
        {
            let table = fetch_table_detail_header_parse(
            &"https://anything".try_into().unwrap(),
            "{\"name\":\"a\",\"symbol\":\"b\",\"data_url\":\"url_idc2\",\"level_order\":[0,1,2,3,4,5,6,7,8,9,10,\"\"]}").unwrap();
            assert_eq!(
                table.level_order,
                ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", ""]
            );
        }
    }

    #[test(tokio::test)]
    async fn fetches_table_from_web_page() {
        use super::{Request, fetch_table};
        use crate::TableEntry;

        let mut server = mockito::Server::new_async().await;
        let mock1 = server
            .mock("GET", "/bmstable/")
            .with_body(r#"<meta name="bmstable" content="header.json"/>"#)
            .create();
        let mock2 = server
            .mock("GET", "/bmstable/header.json")
            .with_body(
                r#"{
  "name" : "マイクールなテイブル",
  "symbol" : "草",
  "data_url" : "data"
}"#,
            )
            .create();
        let mock3 = server
            .mock("GET", "/bmstable/data")
            .with_body(
                // Also test deduplication, missing and empty md5
                r#"[
{ "level": "0", "md5": "feedfeedfeedfeedfeedfeedfeedfeed" },
{ "level": "0", "md5": "feedfeedfeedfeedfeedfeedfeedfeed" },
{ "level": "2", "md5": "" },
{ "level": "2", "md5": null },
{ "level": "2" },
{ "level": "1", "md5": "feedfeedfeedfeedfeedfeedfeedfeed" }
                ]"#,
            )
            .create();
        let reqwest = reqwest::Client::new();

        let test = async |url: String| {
            let table = fetch_table(&reqwest, Request::new_new(&url), NEVER)
                .await
                .unwrap();

            assert_eq!(table.0.web_url.0, url);
            assert_eq!(table.0.name, "マイクールなテイブル");
            assert_eq!(table.0.symbol, "");
            assert!(table.0.data_url.0.ends_with("/bmstable/data"));
            assert_eq!(
                table.0.entries,
                &[
                    TableEntry {
                        md5: "feedfeedfeedfeedfeedfeedfeedfeed".to_string(),
                        level: "0".to_string(),
                    },
                    TableEntry {
                        md5: "feedfeedfeedfeedfeedfeedfeedfeed".to_string(),
                        level: "1".to_string(),
                    },
                ]
            );
        };

        test(format!("http://{}/bmstable/", server.host_with_port())).await;
        // Can pass header URL directly.
        test(format!(
            "http://{}/bmstable/header.json",
            server.host_with_port()
        ))
        .await;

        mock1.assert();
        mock2.expect(2).assert();
        mock3.expect(2).assert();
    }

    #[test(tokio::test)]
    async fn fetches_table_from_local_file() {
        use super::{Request, fetch_table};
        use crate::TableEntry;

        let reqwest = reqwest::Client::new();

        assert_eq!(
            fetch_table(&reqwest, Request::new_new("file://C:\\a\\s\\d\\f"), NEVER)
                .await
                .unwrap_err()
                .to_string(),
            "Failed to fetch table web page from file://C:\\a\\s\\d\\f"
        );

        assert_eq!(
            fetch_table(&reqwest, Request::new_new("file:///no/such/file"), NEVER)
                .await
                .unwrap_err()
                .to_string(),
            "Failed to fetch table web page from file:///no/such/file"
        );

        let tmp_dir = tempfile::tempdir().unwrap();
        let header_path = tmp_dir.path().join("hdr");
        std::fs::write(
            &header_path,
            r#"{ "name": "マイクールなテイブル", "symbol": "草", "data_url": "dt" }"#,
        )
        .unwrap();
        let data_path = tmp_dir.path().join("dt");
        std::fs::write(
            &data_path,
            r#"[ { "level": "0", "md5": "feedfeedfeedfeedfeedfeedfeedfeed" } ]"#,
        )
        .unwrap();

        let url = format!("file://{}", header_path.to_str().unwrap());

        let table = fetch_table(&reqwest, Request::new_new(&url), NEVER)
            .await
            .unwrap();

        assert_eq!(table.0.web_url.0, url);
        assert_eq!(table.0.name, "マイクールなテイブル");
        assert_eq!(table.0.symbol, "");
        assert_eq!(
            table.0.data_url.0,
            format!("file://{}", data_path.to_str().unwrap())
        );
        assert_eq!(
            table.0.entries,
            &[TableEntry {
                md5: "feedfeedfeedfeedfeedfeedfeedfeed".to_string(),
                level: "0".to_string(),
            },]
        );
    }
}