lr2-oxytabler 0.2.1

Table manager for Lunatic Rave 2
use anyhow::{Context, Result};

#[derive(Debug, Clone)]
pub struct Request {
    web_url: String,
    playlist_id: Option<crate::PlaylistId>,
    user_symbol: Option<String>,
}

impl Request {
    pub fn new_for_table(table: &crate::Table) -> Self {
        Self {
            web_url: table.0.web_url.0.clone(),
            playlist_id: table.1.playlist_id,
            user_symbol: table.1.user_symbol.clone(),
        }
    }
    #[cfg(test)]
    fn new_new(web_url: impl Into<String>) -> Self {
        Self {
            web_url: web_url.into(),
            playlist_id: None,
            user_symbol: None,
        }
    }
}

pub async fn fetch_table(
    client: &reqwest::Client,
    req: Request,
    now: crate::UnixEpochTs,
) -> Result<crate::Table> {
    let web_url = req.web_url;
    let playlist_id = req.playlist_id;
    let user_symbol = req.user_symbol;

    let web_page = fetch_url(client, &web_url)
        .await
        .with_context(|| format!("Failed to fetch table web page from {web_url}"))?;

    let (header, header_url) = fetch_table_detail_header_fetch(client, &web_url, &web_page)
        .await
        .context("failed to fetch table header")?;

    let header = fetch_table_detail_header_parse(&header_url, &header)
        .context("failed to parse table header")?;

    let entries = fetch_table_detail_data(client, &header.data_url)
        .await
        .context("failed to fetch or parse table data")?;

    Ok(crate::Table(
        crate::TableData {
            web_url: crate::ResolvedUrl(web_url),
            name: header.name,
            symbol: header.symbol,
            header_url: crate::ResolvedUrl(header_url),
            data_url: crate::ResolvedUrl(header.data_url),
            entries,
            folder_order: header.level_order,
        },
        crate::TableAddData {
            last_update: Some(now),
            playlist_id,
            user_symbol,
            edited_symbol: None,
            edited_url: None,
            pending_removal: false,
        },
    ))
}

struct TableHeader {
    name: String,
    symbol: String,
    /// Normalized URL.
    data_url: String,
    level_order: Vec<String>,
}

async fn fetch_url(client: &reqwest::Client, url: &str) -> Result<String> {
    log::info!("getting url={url}");
    if let Some(url) = url.strip_prefix("file://") {
        std::fs::read_to_string(url).with_context(|| format!("failed to read file {url}"))
    } else {
        let body = client
            .get(url)
            .timeout(std::time::Duration::from_secs(30))
            .send()
            .await
            .with_context(|| format!("failed to get url {url}"))?
            .text()
            .await
            .context("failed to decode downloaded data")?;
        log::debug!("fetched body: {body}");
        Ok(body)
    }
}

async fn fetch_table_detail_header_fetch(
    client: &reqwest::Client,
    web_url: &str,
    web_page: &str,
) -> Result<(String, String)> {
    match crate::parsing::extract_raw_header_url(web_page) {
        Ok(header_url) => {
            let header_url =
                crate::parsing::resolve_json_url(web_url, header_url).with_context(|| {
                    format!("Failed to resolve header URL from {web_url} {header_url}")
                })?;
            let header = fetch_url(client, &header_url)
                .await
                .with_context(|| format!("Failed to fetch header from {header_url}"))?;
            Ok((header, header_url))
        }
        Err(e) => {
            log::debug!(
                "Failed to extract table header URL, this is fine if this URL, {web_url}, is already a header URL. Error: {e:?}"
            );
            Ok((web_page.to_string(), web_url.to_string()))
        }
    }
}

fn fetch_table_detail_header_parse(header_url: &str, header: &str) -> Result<TableHeader> {
    #[derive(Debug, serde::Deserialize)]
    #[serde(untagged)]
    enum NumOrString {
        Num(i64),
        String(String),
    }

    #[derive(Debug, serde::Deserialize)]
    struct TableHeaderForParsing {
        name: String,
        symbol: String,
        /// Relative to current URL. For example:
        /// `https://example.com/table/header.json`
        /// `score.json` would refer to `https://example.com/table/score.json`.
        data_url: String,
        /// Aliased as `folder_order` in some tables. idc
        /// Dystopia uses a mix of numbers and strings.
        level_order: Option<Vec<NumOrString>>,
    }

    let header: TableHeaderForParsing = serde_json::from_str(header).with_context(|| {
        format!("Failed to parse header, header_url={header_url}, header={header}")
    })?;

    let data_url =
        crate::parsing::resolve_json_url(header_url, &header.data_url).with_context(|| {
            format!(
                "failed to resolve data_url from header_url={}, raw_data_url={}",
                header_url, &header.data_url
            )
        })?;

    let level_order = header
        .level_order
        .unwrap_or_default()
        .into_iter()
        .map(|v| match v {
            NumOrString::Num(v) => v.to_string(),
            NumOrString::String(v) => v,
        })
        .collect();

    Ok(TableHeader {
        name: header.name,
        symbol: header.symbol,
        data_url,
        level_order,
    })
}

async fn fetch_table_detail_data(
    client: &reqwest::Client,
    data_url: &str,
) -> Result<Vec<crate::TableEntry>> {
    #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, serde::Deserialize)]
    struct JsonTableDataEntry {
        // Some new tables omit 'md5' and only include 'sha256', especially for bmson files.
        md5: Option<String>,
        level: String,
    }

    let entries = fetch_url(client, data_url)
        .await
        .with_context(|| format!("Failed to fetch data from {data_url}"))?;

    let data = serde_json::from_str::<Vec<JsonTableDataEntry>>(&entries)
        .with_context(|| format!("Failed to parse data, data={entries}"))?;
    let orig_len = data.len();
    let mut data = data
        .into_iter()
        .filter_map(|t| {
            const MD5_LEN: usize = 32;
            // Normal 2, Gachimijoy, Mini-jack tables, all have empty entries.
            if t.md5.as_ref()?.len() == MD5_LEN {
                Some(crate::TableEntry {
                    md5: t.md5?,
                    level: t.level,
                })
            } else {
                None
            }
        })
        .collect::<Vec<_>>();
    // Some tables have completely duplicated entries which is against our DB schema constraint.
    data.sort();
    data.dedup();
    log::debug!(
        "{}: parsed {}/{} good and unique entries",
        data_url,
        data.len(),
        orig_len
    );
    Ok(data)
}

#[cfg(test)]
mod tests {
    use test_log::test;

    const NEVER: u64 = u64::MAX;

    #[test]
    fn unit_test_header_parse() {
        use crate::fetch::fetch_table_detail_header_parse;
        {
            let table = fetch_table_detail_header_parse(
            "https://anything",
            "{\"name\":\"a\",\"symbol\":\"b\",\"data_url\":\"url_idc2\",\"level_order\":[0,1,2,3,4,5,6,7,8,9,10,\"\"]}").unwrap();
            assert_eq!(
                table.level_order,
                ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", ""]
            );
        }
    }

    #[test(tokio::test)]
    async fn fetches_table_from_web_page() {
        use super::{Request, fetch_table};
        use crate::TableEntry;

        let mut server = mockito::Server::new_async().await;
        let mock1 = server
            .mock("GET", "/bmstable/")
            .with_body(r#"<meta name="bmstable" content="header.json"/>"#)
            .create();
        let mock2 = server
            .mock("GET", "/bmstable/header.json")
            .with_body(
                r#"{
  "name" : "マイクールなテイブル",
  "symbol" : "草",
  "data_url" : "data"
}"#,
            )
            .create();
        let mock3 = server
            .mock("GET", "/bmstable/data")
            .with_body(
                // Also test deduplication, missing and empty md5
                r#"[
{ "level": "0", "md5": "feedfeedfeedfeedfeedfeedfeedfeed" },
{ "level": "0", "md5": "feedfeedfeedfeedfeedfeedfeedfeed" },
{ "level": "2", "md5": "" },
{ "level": "2", "md5": null },
{ "level": "2" },
{ "level": "1", "md5": "feedfeedfeedfeedfeedfeedfeedfeed" }
                ]"#,
            )
            .create();
        let reqwest = reqwest::Client::new();

        let test = async |url: String| {
            let table = fetch_table(&reqwest, Request::new_new(&url), NEVER)
                .await
                .unwrap();

            assert_eq!(table.0.web_url.0, url);
            assert_eq!(table.0.name, "マイクールなテイブル");
            assert_eq!(table.0.symbol, "");
            assert!(table.0.data_url.0.ends_with("/bmstable/data"));
            assert_eq!(
                table.0.entries,
                &[
                    TableEntry {
                        md5: "feedfeedfeedfeedfeedfeedfeedfeed".to_string(),
                        level: "0".to_string(),
                    },
                    TableEntry {
                        md5: "feedfeedfeedfeedfeedfeedfeedfeed".to_string(),
                        level: "1".to_string(),
                    },
                ]
            );
        };

        test(format!("http://{}/bmstable/", server.host_with_port())).await;
        // Can pass header URL directly.
        test(format!(
            "http://{}/bmstable/header.json",
            server.host_with_port()
        ))
        .await;

        mock1.assert();
        mock2.expect(2).assert();
        mock3.expect(2).assert();
    }

    #[test(tokio::test)]
    async fn fetches_table_from_local_file() {
        use super::{Request, fetch_table};
        use crate::TableEntry;

        let reqwest = reqwest::Client::new();

        assert_eq!(
            fetch_table(&reqwest, Request::new_new("file://C:\\a\\s\\d\\f"), NEVER)
                .await
                .unwrap_err()
                .to_string(),
            "Failed to fetch table web page from file://C:\\a\\s\\d\\f"
        );

        assert_eq!(
            fetch_table(&reqwest, Request::new_new("file:///no/such/file"), NEVER)
                .await
                .unwrap_err()
                .to_string(),
            "Failed to fetch table web page from file:///no/such/file"
        );

        let tmp_dir = tempfile::tempdir().unwrap();
        let header_path = tmp_dir.path().join("hdr");
        std::fs::write(
            &header_path,
            r#"{ "name": "マイクールなテイブル", "symbol": "草", "data_url": "dt" }"#,
        )
        .unwrap();
        let data_path = tmp_dir.path().join("dt");
        std::fs::write(
            &data_path,
            r#"[ { "level": "0", "md5": "feedfeedfeedfeedfeedfeedfeedfeed" } ]"#,
        )
        .unwrap();

        let url = format!("file://{}", header_path.to_str().unwrap());

        let table = fetch_table(&reqwest, Request::new_new(&url), NEVER)
            .await
            .unwrap();

        assert_eq!(table.0.web_url.0, url);
        assert_eq!(table.0.name, "マイクールなテイブル");
        assert_eq!(table.0.symbol, "");
        assert_eq!(
            table.0.data_url.0,
            format!("file://{}", data_path.to_str().unwrap())
        );
        assert_eq!(
            table.0.entries,
            &[TableEntry {
                md5: "feedfeedfeedfeedfeedfeedfeedfeed".to_string(),
                level: "0".to_string(),
            },]
        );
    }
}