use anyhow::{Context, Result};
#[derive(Debug, Clone)]
pub struct Request {
web_url: String,
playlist_id: Option<crate::PlaylistId>,
user_symbol: Option<String>,
}
impl Request {
pub fn new_for_table(table: &crate::Table) -> Self {
Self {
web_url: table.0.web_url.0.clone(),
playlist_id: table.1.playlist_id,
user_symbol: table.1.user_symbol.clone(),
}
}
#[cfg(test)]
fn new_new(web_url: impl Into<String>) -> Self {
Self {
web_url: web_url.into(),
playlist_id: None,
user_symbol: None,
}
}
}
pub async fn fetch_table(
client: &reqwest::Client,
req: Request,
now: crate::UnixEpochTs,
) -> Result<crate::Table> {
let web_url = req.web_url;
let playlist_id = req.playlist_id;
let user_symbol = req.user_symbol;
let web_page = fetch_url(client, &web_url)
.await
.with_context(|| format!("Failed to fetch table web page from {web_url}"))?;
let (header, header_url) = fetch_table_detail_header_fetch(client, &web_url, &web_page)
.await
.context("failed to fetch table header")?;
let header = fetch_table_detail_header_parse(&header_url, &header)
.context("failed to parse table header")?;
let entries = fetch_table_detail_data(client, &header.data_url)
.await
.context("failed to fetch or parse table data")?;
Ok(crate::Table(
crate::TableData {
web_url: crate::ResolvedUrl(web_url),
name: header.name,
symbol: header.symbol,
header_url: crate::ResolvedUrl(header_url),
data_url: crate::ResolvedUrl(header.data_url),
entries,
folder_order: header.level_order,
},
crate::TableAddData {
last_update: Some(now),
playlist_id,
user_symbol,
edited_symbol: None,
edited_url: None,
pending_removal: false,
},
))
}
struct TableHeader {
name: String,
symbol: String,
data_url: String,
level_order: Vec<String>,
}
async fn fetch_url(client: &reqwest::Client, url: &str) -> Result<String> {
log::info!("getting url={url}");
if let Some(url) = url.strip_prefix("file://") {
std::fs::read_to_string(url).with_context(|| format!("failed to read file {url}"))
} else {
let body = client
.get(url)
.timeout(std::time::Duration::from_secs(30))
.send()
.await
.with_context(|| format!("failed to get url {url}"))?
.text()
.await
.context("failed to decode downloaded data")?;
log::debug!("fetched body: {body}");
Ok(body)
}
}
async fn fetch_table_detail_header_fetch(
client: &reqwest::Client,
web_url: &str,
web_page: &str,
) -> Result<(String, String)> {
match crate::parsing::extract_raw_header_url(web_page) {
Ok(header_url) => {
let header_url =
crate::parsing::resolve_json_url(web_url, header_url).with_context(|| {
format!("Failed to resolve header URL from {web_url} {header_url}")
})?;
let header = fetch_url(client, &header_url)
.await
.with_context(|| format!("Failed to fetch header from {header_url}"))?;
Ok((header, header_url))
}
Err(e) => {
log::debug!(
"Failed to extract table header URL, this is fine if this URL, {web_url}, is already a header URL. Error: {e:?}"
);
Ok((web_page.to_string(), web_url.to_string()))
}
}
}
fn fetch_table_detail_header_parse(header_url: &str, header: &str) -> Result<TableHeader> {
#[derive(Debug, serde::Deserialize)]
#[serde(untagged)]
enum NumOrString {
Num(i64),
String(String),
}
#[derive(Debug, serde::Deserialize)]
struct TableHeaderForParsing {
name: String,
symbol: String,
data_url: String,
level_order: Option<Vec<NumOrString>>,
}
let header: TableHeaderForParsing = serde_json::from_str(header).with_context(|| {
format!("Failed to parse header, header_url={header_url}, header={header}")
})?;
let data_url =
crate::parsing::resolve_json_url(header_url, &header.data_url).with_context(|| {
format!(
"failed to resolve data_url from header_url={}, raw_data_url={}",
header_url, &header.data_url
)
})?;
let level_order = header
.level_order
.unwrap_or_default()
.into_iter()
.map(|v| match v {
NumOrString::Num(v) => v.to_string(),
NumOrString::String(v) => v,
})
.collect();
Ok(TableHeader {
name: header.name,
symbol: header.symbol,
data_url,
level_order,
})
}
async fn fetch_table_detail_data(
client: &reqwest::Client,
data_url: &str,
) -> Result<Vec<crate::TableEntry>> {
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, serde::Deserialize)]
struct JsonTableDataEntry {
md5: Option<String>,
level: String,
}
let entries = fetch_url(client, data_url)
.await
.with_context(|| format!("Failed to fetch data from {data_url}"))?;
let data = serde_json::from_str::<Vec<JsonTableDataEntry>>(&entries)
.with_context(|| format!("Failed to parse data, data={entries}"))?;
let orig_len = data.len();
let mut data = data
.into_iter()
.filter_map(|t| {
const MD5_LEN: usize = 32;
if t.md5.as_ref()?.len() == MD5_LEN {
Some(crate::TableEntry {
md5: t.md5?,
level: t.level,
})
} else {
None
}
})
.collect::<Vec<_>>();
data.sort();
data.dedup();
log::debug!(
"{}: parsed {}/{} good and unique entries",
data_url,
data.len(),
orig_len
);
Ok(data)
}
#[cfg(test)]
mod tests {
use test_log::test;
const NEVER: u64 = u64::MAX;
#[test]
fn unit_test_header_parse() {
use crate::fetch::fetch_table_detail_header_parse;
{
let table = fetch_table_detail_header_parse(
"https://anything",
"{\"name\":\"a\",\"symbol\":\"b\",\"data_url\":\"url_idc2\",\"level_order\":[0,1,2,3,4,5,6,7,8,9,10,\"?\"]}").unwrap();
assert_eq!(
table.level_order,
["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "?"]
);
}
}
#[test(tokio::test)]
async fn fetches_table_from_web_page() {
use super::{Request, fetch_table};
use crate::TableEntry;
let mut server = mockito::Server::new_async().await;
let mock1 = server
.mock("GET", "/bmstable/")
.with_body(r#"<meta name="bmstable" content="header.json"/>"#)
.create();
let mock2 = server
.mock("GET", "/bmstable/header.json")
.with_body(
r#"{
"name" : "マイクールなテイブル",
"symbol" : "草",
"data_url" : "data"
}"#,
)
.create();
let mock3 = server
.mock("GET", "/bmstable/data")
.with_body(
r#"[
{ "level": "0", "md5": "feedfeedfeedfeedfeedfeedfeedfeed" },
{ "level": "0", "md5": "feedfeedfeedfeedfeedfeedfeedfeed" },
{ "level": "2", "md5": "" },
{ "level": "2", "md5": null },
{ "level": "2" },
{ "level": "1", "md5": "feedfeedfeedfeedfeedfeedfeedfeed" }
]"#,
)
.create();
let reqwest = reqwest::Client::new();
let test = async |url: String| {
let table = fetch_table(&reqwest, Request::new_new(&url), NEVER)
.await
.unwrap();
assert_eq!(table.0.web_url.0, url);
assert_eq!(table.0.name, "マイクールなテイブル");
assert_eq!(table.0.symbol, "草");
assert!(table.0.data_url.0.ends_with("/bmstable/data"));
assert_eq!(
table.0.entries,
&[
TableEntry {
md5: "feedfeedfeedfeedfeedfeedfeedfeed".to_string(),
level: "0".to_string(),
},
TableEntry {
md5: "feedfeedfeedfeedfeedfeedfeedfeed".to_string(),
level: "1".to_string(),
},
]
);
};
test(format!("http://{}/bmstable/", server.host_with_port())).await;
test(format!(
"http://{}/bmstable/header.json",
server.host_with_port()
))
.await;
mock1.assert();
mock2.expect(2).assert();
mock3.expect(2).assert();
}
#[test(tokio::test)]
async fn fetches_table_from_local_file() {
use super::{Request, fetch_table};
use crate::TableEntry;
let reqwest = reqwest::Client::new();
assert_eq!(
fetch_table(&reqwest, Request::new_new("file://C:\\a\\s\\d\\f"), NEVER)
.await
.unwrap_err()
.to_string(),
"Failed to fetch table web page from file://C:\\a\\s\\d\\f"
);
assert_eq!(
fetch_table(&reqwest, Request::new_new("file:///no/such/file"), NEVER)
.await
.unwrap_err()
.to_string(),
"Failed to fetch table web page from file:///no/such/file"
);
let tmp_dir = tempfile::tempdir().unwrap();
let header_path = tmp_dir.path().join("hdr");
std::fs::write(
&header_path,
r#"{ "name": "マイクールなテイブル", "symbol": "草", "data_url": "dt" }"#,
)
.unwrap();
let data_path = tmp_dir.path().join("dt");
std::fs::write(
&data_path,
r#"[ { "level": "0", "md5": "feedfeedfeedfeedfeedfeedfeedfeed" } ]"#,
)
.unwrap();
let url = format!("file://{}", header_path.to_str().unwrap());
let table = fetch_table(&reqwest, Request::new_new(&url), NEVER)
.await
.unwrap();
assert_eq!(table.0.web_url.0, url);
assert_eq!(table.0.name, "マイクールなテイブル");
assert_eq!(table.0.symbol, "草");
assert_eq!(
table.0.data_url.0,
format!("file://{}", data_path.to_str().unwrap())
);
assert_eq!(
table.0.entries,
&[TableEntry {
md5: "feedfeedfeedfeedfeedfeedfeedfeed".to_string(),
level: "0".to_string(),
},]
);
}
}