use crate::url::ResolvedUrl;
use anyhow::{Context as _, Result};
use std::collections::HashMap;
#[derive(Debug, Clone)]
pub struct TableLocator {
web_url: ResolvedUrl,
}
impl TableLocator {
#[must_use]
pub fn new_for_table(table: &crate::table::Table) -> Self {
Self {
web_url: table.0.web_url.clone(),
}
}
#[must_use]
pub fn locate(&self, tables: &[crate::table::Table]) -> Option<usize> {
tables.iter().position(|t| t.0.web_url == self.web_url)
}
#[must_use]
pub const fn web_url(&self) -> &ResolvedUrl {
&self.web_url
}
}
impl std::fmt::Display for TableLocator {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_fmt(format_args!("{} id=None", self.web_url.as_str()))
}
}
pub async fn fetch_table(
client: reqwest::Client,
web_url: ResolvedUrl,
) -> Result<(crate::table::Data, HashMap<String, String>)> {
let web_page = fetch_url(client.clone(), &web_url)
.await
.context("failed to fetch table web page")?;
let (header_url, header) = match extract_raw_header_url(&web_page) {
Ok(header_url) => {
let header_url = web_url
.with_last_segment(header_url)
.context("failed to resolve header URL")?;
let header = fetch_url(client.clone(), &header_url)
.await
.context("failed to fetch table header")?;
(header_url, header)
}
Err(e) => {
log::debug!("failed to find header URL in the web page, treating it itself as header");
log::debug!("error was: {e:?}");
(web_url.clone(), web_page.clone())
}
};
let header = parse_header(&header_url, &header).context("failed to parse table header")?;
let entries = fetch_url(client.clone(), &header.data_url)
.await
.context("failed to fetch table data")?;
let (mut entries, song_titles) = parse_data(&entries).context("failed to parse table data")?;
let orig_len = entries.len();
entries.sort();
entries.dedup();
log::debug!(
"{}: parsed {}/{} good and unique entries",
header.data_url.as_str(),
entries.len(),
orig_len
);
Ok((
crate::table::Data {
web_url,
name: header.name,
symbol: header.symbol,
entries,
folder_order: header.level_order,
},
song_titles,
))
}
#[derive(PartialEq, Debug)]
struct TableHeader {
name: String,
symbol: String,
data_url: ResolvedUrl,
level_order: Vec<String>,
}
async fn fetch_url(client: reqwest::Client, url: &ResolvedUrl) -> Result<String> {
log::debug!("getting url={}", url.as_str());
if let Some(url) = url.as_str().strip_prefix("file://") {
tokio::fs::read_to_string(url)
.await
.with_context(|| format!("failed to read file {url}"))
} else {
let body = client
.get(url.as_str())
.timeout(std::time::Duration::from_secs(30))
.send()
.await
.with_context(|| format!("failed to get url {}", url.as_str()))?
.text()
.await
.context("failed to decode downloaded data")?;
log::debug!("fetched body: {body}");
Ok(body)
}
}
fn extract_raw_header_url(html: &str) -> Result<&str> {
let lhs_search = r#"<meta name="bmstable" content=""#;
let lhs = html.find(lhs_search).context("missing bmstable meta")?;
let html = &html[lhs + lhs_search.len()..];
let rhs = html.find('"').context("missing bmstable meta rhs")?;
Ok(&html[..rhs])
}
#[derive(Debug, serde::Deserialize)]
#[serde(untagged)]
enum NumOrString {
Num(i64),
String(String),
}
fn parse_header(header_url: &ResolvedUrl, header: &str) -> Result<TableHeader> {
#[derive(Debug, serde::Deserialize)]
struct TableHeaderForParsing {
name: String,
symbol: String,
data_url: String,
level_order: Option<Vec<NumOrString>>,
}
let header: TableHeaderForParsing = serde_json::from_str(header).with_context(|| {
format!(
"Failed to parse header, header_url={}, header={header}",
header_url.as_str()
)
})?;
let data_url = header_url
.with_last_segment(&header.data_url)
.with_context(|| {
format!(
"failed to resolve data_url from header_url={}, raw_data_url={}",
header_url.as_str(),
&header.data_url
)
})?;
let level_order = header
.level_order
.unwrap_or_default()
.into_iter()
.map(|v| match v {
NumOrString::Num(v) => v.to_string(),
NumOrString::String(v) => v,
})
.collect();
Ok(TableHeader {
name: header.name,
symbol: header.symbol,
data_url,
level_order,
})
}
fn parse_data(data: &str) -> Result<(Vec<crate::table::Entry>, HashMap<String, String>)> {
#[derive(Debug, serde::Deserialize)]
struct JsonTableDataEntry {
md5: Option<String>,
level: NumOrString,
title: Option<String>,
}
const MD5_LEN: usize = 32;
let mut entries = Vec::<crate::table::Entry>::new();
let mut song_titles = HashMap::<String, String>::new();
for entry in serde_json::from_str::<Vec<JsonTableDataEntry>>(data)
.with_context(|| format!("Failed to parse data, data={data}"))?
{
if let Some(md5) = entry.md5 {
if md5.len() != MD5_LEN {
continue;
}
if let Some(title) = entry.title {
song_titles.insert(md5.clone(), title);
}
entries.push(crate::table::Entry {
md5,
level: match entry.level {
NumOrString::Num(n) => n.to_string(),
NumOrString::String(s) => s,
},
});
}
}
Ok((entries, song_titles))
}
#[cfg(test)]
mod tests {
use test_log::test;
#[test(tokio::test)]
async fn fetches_file_url() {
use super::fetch_url;
use crate::url::ResolvedUrl;
let reqwest = reqwest::Client::new();
assert_eq!(
fetch_url(
reqwest.clone(),
&ResolvedUrl::try_from("file://C:\\no-such-file").unwrap()
)
.await
.unwrap_err()
.to_string(),
"failed to read file C:\\no-such-file"
);
assert_eq!(
fetch_url(
reqwest.clone(),
&ResolvedUrl::try_from("file:///no-such-file").unwrap()
)
.await
.unwrap_err()
.to_string(),
"failed to read file /no-such-file"
);
let tmp_dir = tempfile::tempdir().unwrap();
let tmp_dir = tmp_dir.path();
let tmp_file = tmp_dir.join("a-file");
std::fs::write(&tmp_file, "some data").unwrap();
assert_eq!(
fetch_url(
reqwest.clone(),
&ResolvedUrl::try_from(format!("file://{}", tmp_dir.to_str().unwrap())).unwrap(),
)
.await
.unwrap_err()
.to_string(),
format!("failed to read file {}", tmp_dir.to_str().unwrap())
);
assert_eq!(
fetch_url(
reqwest.clone(),
&ResolvedUrl::try_from(format!("file://{}", tmp_file.to_str().unwrap())).unwrap(),
)
.await
.unwrap(),
"some data"
);
}
#[test]
fn extracts_header_url() {
use super::extract_raw_header_url;
assert_eq!(
extract_raw_header_url(
r#"<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"><html><head>
<meta name"blahblah" /><meta name="bmstable" content="header.json" />"#
)
.ok(),
Some("header.json")
);
assert_eq!(
extract_raw_header_url(r#"<meta name="bmstable" content="header.json"/>"#).ok(),
Some("header.json")
);
assert_eq!(
format!("{}", extract_raw_header_url("").unwrap_err()),
"missing bmstable meta"
);
assert_eq!(
format!(
"{}",
extract_raw_header_url(r#"<meta name="bmstable" content="header.json"#)
.unwrap_err()
),
"missing bmstable meta rhs"
);
assert_eq!(
format!(
"{}",
extract_raw_header_url(r#"<meta name="bmstable" content="header.json" />"#)
.unwrap_err()
),
"missing bmstable meta"
);
}
#[test]
fn parses_data() {
use super::parse_data;
use crate::table::Entry;
use std::collections::HashMap;
assert_eq!(
parse_data("").unwrap_err().to_string(),
"Failed to parse data, data="
);
assert_eq!(
parse_data("{}").unwrap_err().to_string(),
"Failed to parse data, data={}"
);
assert_eq!(
parse_data(
r#"
[
{
"level": "1",
"md5": "feedfeedfeedfeedfeedfeedfeed0000",
"title": "title [subtitle]",
"artist": "ignored"
},
{
"level": "1",
"md5": "feedfeedfeedfeedfeedfeedfeed0000",
"title": "lol another title for the same song [subtitle]",
"artist": "ignored"
},
{
"level": "1",
"md5": ""
},
{
"level": "1",
"md5": null
},
{
"level": "1"
},
{
"level": "1",
"md5": "bogus"
}
]
"#,
)
.unwrap(),
(
Vec::from([
Entry {
md5: "feedfeedfeedfeedfeedfeedfeed0000".to_string(),
level: "1".to_string()
},
Entry {
md5: "feedfeedfeedfeedfeedfeedfeed0000".to_string(),
level: "1".to_string()
}
]),
HashMap::from([(
"feedfeedfeedfeedfeedfeedfeed0000".to_string(),
"lol another title for the same song [subtitle]".to_string(),
)])
)
);
}
#[test]
fn parses_header() {
use super::{TableHeader, parse_header};
use crate::url::ResolvedUrl;
let dummy_url = &"https://anything".try_into().unwrap();
assert_eq!(
parse_header(
dummy_url,
"{\"name\":\"a\",\"symbol\":\"b\",\"data_url\":\"url_idc2\",\"level_order\":[0,1,2,3,4,5,6,7,8,9,10,\"?\"]}").unwrap(),
TableHeader {
name: "a".to_string(),
symbol: "b".to_string(),
data_url: ResolvedUrl::try_from("https://url_idc2").unwrap(),
level_order: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "?"]
.iter()
.map(ToString::to_string)
.collect()
}
);
assert_eq!(
parse_header(dummy_url, "bogus").unwrap_err().to_string(),
"Failed to parse header, header_url=https://anything, header=bogus"
);
assert_eq!(
parse_header(dummy_url, "{}").unwrap_err().to_string(),
"Failed to parse header, header_url=https://anything, header={}"
);
}
#[test(tokio::test)]
async fn fetches_table_from_web_page() {
use super::fetch_table;
use crate::{table::Entry, url::ResolvedUrl};
let mut server = mockito::Server::new_async().await;
let mock1 = server
.mock("GET", "/bmstable/")
.with_body(r#"<meta name="bmstable" content="header.json"/>"#)
.create();
let mock2 = server
.mock("GET", "/bmstable/header.json")
.with_body(
r#"{
"name" : "マイクールなテイブル",
"symbol" : "草",
"data_url" : "data"
}"#,
)
.create();
let mock3 = server
.mock("GET", "/bmstable/data")
.with_body(
r#"[
{ "level": 0, "md5": "feedfeedfeedfeedfeedfeedfeedfeed" },
{ "level": "0", "md5": "feedfeedfeedfeedfeedfeedfeedfeed" },
{ "level": "1", "md5": "feedfeedfeedfeedfeedfeedfeedfeed" }
]"#,
)
.create();
let reqwest = reqwest::Client::new();
let test = async |url: String| {
let table = fetch_table(reqwest.clone(), ResolvedUrl::try_from(url.clone()).unwrap())
.await
.unwrap()
.0;
assert_eq!(table.web_url.as_str(), url);
assert_eq!(table.name, "マイクールなテイブル");
assert_eq!(table.symbol, "草");
assert_eq!(
table.entries,
&[
Entry {
md5: "feedfeedfeedfeedfeedfeedfeedfeed".to_string(),
level: "0".to_string(),
},
Entry {
md5: "feedfeedfeedfeedfeedfeedfeedfeed".to_string(),
level: "1".to_string(),
},
]
);
};
test(format!("http://{}/bmstable/", server.host_with_port())).await;
test(format!(
"http://{}/bmstable/header.json",
server.host_with_port()
))
.await;
mock1.assert();
mock2.expect(2).assert();
mock3.expect(2).assert();
}
#[test]
fn locator_on_several_new_tables() {
use super::TableLocator;
use crate::table::Table;
let tables = &[
Table::empty().with_url("https://1"),
Table::empty().with_url("https://2"),
];
assert_eq!(
TableLocator::new_for_table(&tables[0])
.locate(tables)
.unwrap(),
0
);
assert_eq!(
TableLocator::new_for_table(&tables[1])
.locate(tables)
.unwrap(),
1
);
}
}