use anyhow::{Context, Result};
#[derive(Debug, Clone)]
pub struct Request {
web_url: crate::ResolvedUrl,
playlist_id: Option<crate::PlaylistId>,
}
impl Request {
pub fn new_for_table(table: &crate::Table) -> Self {
Self {
web_url: table.0.web_url.clone(),
playlist_id: table.1.playlist_id,
}
}
pub fn position(&self, tables: &[crate::Table]) -> Option<usize> {
tables
.iter()
.position(|t| t.1.playlist_id == self.playlist_id || t.0.web_url.0 == self.web_url.0)
}
pub const fn web_url(&self) -> &crate::ResolvedUrl {
&self.web_url
}
}
#[derive(Debug, Clone)]
pub struct RequestAnswer {
pub data: crate::TableData,
pub request: Request,
}
pub async fn fetch_table(client: reqwest::Client, req: Request) -> Result<RequestAnswer> {
let web_url = req.web_url.clone();
let web_page = fetch_url(client.clone(), &web_url)
.await
.with_context(|| format!("Failed to fetch table web page from {}", web_url.0))?;
let (header, header_url) = fetch_table_detail_header_fetch(client.clone(), &web_url, &web_page)
.await
.context("failed to fetch table header")?;
let header = fetch_table_detail_header_parse(&header_url, &header)
.context("failed to parse table header")?;
let entries = fetch_table_detail_data(client.clone(), &header.data_url)
.await
.context("failed to fetch or parse table data")?;
Ok(RequestAnswer {
data: crate::TableData {
web_url,
name: header.name,
symbol: header.symbol,
entries,
folder_order: header.level_order,
},
request: req,
})
}
struct TableHeader {
name: String,
symbol: String,
data_url: crate::ResolvedUrl,
level_order: Vec<String>,
}
async fn fetch_url(client: reqwest::Client, url: &crate::ResolvedUrl) -> Result<String> {
log::debug!("getting url={}", url.0);
if let Some(url) = url.0.strip_prefix("file://") {
std::fs::read_to_string(url).with_context(|| format!("failed to read file {url}"))
} else {
let body = client
.get(&url.0)
.timeout(std::time::Duration::from_secs(30))
.send()
.await
.with_context(|| format!("failed to get url {}", url.0))?
.text()
.await
.context("failed to decode downloaded data")?;
log::debug!("fetched body: {body}");
Ok(body)
}
}
async fn fetch_table_detail_header_fetch(
client: reqwest::Client,
web_url: &crate::ResolvedUrl,
web_page: &str,
) -> Result<(String, crate::ResolvedUrl)> {
match crate::parsing::extract_raw_header_url(web_page) {
Ok(raw_header_url) => {
let header_url = web_url.resolve_json_url(raw_header_url).with_context(|| {
format!(
"Failed to resolve header URL from {} {}",
web_url.0, raw_header_url
)
})?;
let header = fetch_url(client, &header_url)
.await
.with_context(|| format!("Failed to fetch header from {}", header_url.0))?;
Ok((header, header_url))
}
Err(e) => {
log::debug!(
"failed to extract table header URL, this is fine if this URL, {}, is already a header URL",
web_url.0
);
log::debug!("error was: {e:?}");
Ok((web_page.to_string(), web_url.clone()))
}
}
}
fn fetch_table_detail_header_parse(
header_url: &crate::ResolvedUrl,
header: &str,
) -> Result<TableHeader> {
#[derive(Debug, serde::Deserialize)]
#[serde(untagged)]
enum NumOrString {
Num(i64),
String(String),
}
#[derive(Debug, serde::Deserialize)]
struct TableHeaderForParsing {
name: String,
symbol: String,
data_url: String,
level_order: Option<Vec<NumOrString>>,
}
let header: TableHeaderForParsing = serde_json::from_str(header).with_context(|| {
format!(
"Failed to parse header, header_url={}, header={header}",
header_url.0
)
})?;
let data_url = header_url
.resolve_json_url(&header.data_url)
.with_context(|| {
format!(
"failed to resolve data_url from header_url={}, raw_data_url={}",
header_url.0, &header.data_url
)
})?;
let level_order = header
.level_order
.unwrap_or_default()
.into_iter()
.map(|v| match v {
NumOrString::Num(v) => v.to_string(),
NumOrString::String(v) => v,
})
.collect();
Ok(TableHeader {
name: header.name,
symbol: header.symbol,
data_url,
level_order,
})
}
async fn fetch_table_detail_data(
client: reqwest::Client,
data_url: &crate::ResolvedUrl,
) -> Result<Vec<crate::TableEntry>> {
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, serde::Deserialize)]
struct JsonTableDataEntry {
md5: Option<String>,
level: String,
}
let entries = fetch_url(client, data_url)
.await
.with_context(|| format!("Failed to fetch data from {}", data_url.0))?;
let data = serde_json::from_str::<Vec<JsonTableDataEntry>>(&entries)
.with_context(|| format!("Failed to parse data, data={entries}"))?;
let orig_len = data.len();
let mut data = data
.into_iter()
.filter_map(|t| {
const MD5_LEN: usize = 32;
if t.md5.as_ref()?.len() == MD5_LEN {
Some(crate::TableEntry {
md5: t.md5?,
level: t.level,
})
} else {
None
}
})
.collect::<Vec<_>>();
data.sort();
data.dedup();
log::debug!(
"{}: parsed {}/{} good and unique entries",
data_url.0,
data.len(),
orig_len
);
Ok(data)
}
#[cfg(test)]
mod tests {
use test_log::test;
#[test]
fn unit_test_header_parse() {
use crate::fetch::fetch_table_detail_header_parse;
{
let table = fetch_table_detail_header_parse(
&"https://anything".try_into().unwrap(),
"{\"name\":\"a\",\"symbol\":\"b\",\"data_url\":\"url_idc2\",\"level_order\":[0,1,2,3,4,5,6,7,8,9,10,\"?\"]}").unwrap();
assert_eq!(
table.level_order,
["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "?"]
);
}
}
#[test(tokio::test)]
async fn fetches_table_from_web_page() {
use super::{Request, fetch_table};
use crate::TableEntry;
let mut server = mockito::Server::new_async().await;
let mock1 = server
.mock("GET", "/bmstable/")
.with_body(r#"<meta name="bmstable" content="header.json"/>"#)
.create();
let mock2 = server
.mock("GET", "/bmstable/header.json")
.with_body(
r#"{
"name" : "マイクールなテイブル",
"symbol" : "草",
"data_url" : "data"
}"#,
)
.create();
let mock3 = server
.mock("GET", "/bmstable/data")
.with_body(
r#"[
{ "level": "0", "md5": "feedfeedfeedfeedfeedfeedfeedfeed" },
{ "level": "0", "md5": "feedfeedfeedfeedfeedfeedfeedfeed" },
{ "level": "2", "md5": "" },
{ "level": "2", "md5": null },
{ "level": "2" },
{ "level": "1", "md5": "feedfeedfeedfeedfeedfeedfeedfeed" }
]"#,
)
.create();
let reqwest = reqwest::Client::new();
let test = async |url: String| {
let table = fetch_table(
reqwest.clone(),
Request::new_for_table(&crate::Table::empty().with_url(&url)),
)
.await
.unwrap()
.data;
assert_eq!(table.web_url.0, url);
assert_eq!(table.name, "マイクールなテイブル");
assert_eq!(table.symbol, "草");
assert_eq!(
table.entries,
&[
TableEntry {
md5: "feedfeedfeedfeedfeedfeedfeedfeed".to_string(),
level: "0".to_string(),
},
TableEntry {
md5: "feedfeedfeedfeedfeedfeedfeedfeed".to_string(),
level: "1".to_string(),
},
]
);
};
test(format!("http://{}/bmstable/", server.host_with_port())).await;
test(format!(
"http://{}/bmstable/header.json",
server.host_with_port()
))
.await;
mock1.assert();
mock2.expect(2).assert();
mock3.expect(2).assert();
}
#[test(tokio::test)]
async fn fetches_table_from_local_file() {
use super::{Request, fetch_table};
use crate::TableEntry;
let request = |url| Request::new_for_table(&crate::Table::empty().with_url(url));
let reqwest = reqwest::Client::new();
assert_eq!(
fetch_table(reqwest.clone(), request("file://C:\\a\\s\\d\\f"))
.await
.unwrap_err()
.to_string(),
"Failed to fetch table web page from file://C:\\a\\s\\d\\f"
);
assert_eq!(
fetch_table(reqwest.clone(), request("file:///no/such/file"))
.await
.unwrap_err()
.to_string(),
"Failed to fetch table web page from file:///no/such/file"
);
let tmp_dir = tempfile::tempdir().unwrap();
let header_path = tmp_dir.path().join("hdr");
std::fs::write(
&header_path,
r#"{ "name": "マイクールなテイブル", "symbol": "草", "data_url": "dt" }"#,
)
.unwrap();
let data_path = tmp_dir.path().join("dt");
std::fs::write(
&data_path,
r#"[ { "level": "0", "md5": "feedfeedfeedfeedfeedfeedfeedfeed" } ]"#,
)
.unwrap();
let url = format!("file://{}", header_path.to_str().unwrap());
let table = fetch_table(reqwest, request(&url)).await.unwrap().data;
assert_eq!(table.web_url.0, url);
assert_eq!(table.name, "マイクールなテイブル");
assert_eq!(table.symbol, "草");
assert_eq!(
table.entries,
&[TableEntry {
md5: "feedfeedfeedfeedfeedfeedfeedfeed".to_string(),
level: "0".to_string(),
},]
);
}
}