#![cfg(feature = "scraper")]
pub mod reqwest;
use std::future::Future;
use anyhow::{Context, Result, anyhow};
use scraper::{ElementRef, Html, Selector};
use serde::de::DeserializeOwned;
use crate::{BmsTable, BmsTableInfo, BmsTableRaw};
pub struct FetchedTable {
pub table: BmsTable,
pub raw: BmsTableRaw,
}
pub struct FetchedTableList {
pub tables: Vec<BmsTableInfo>,
pub raw_json: String,
}
pub trait TableFetcher {
fn fetch_table(
&self,
web_url: url::Url,
) -> impl Future<Output = Result<FetchedTable>> + Send + '_;
fn fetch_table_list(
&self,
web_url: url::Url,
) -> impl Future<Output = Result<FetchedTableList>> + Send + '_;
}
pub enum HeaderQueryContent<T> {
Url(String),
Value(T),
}
#[must_use]
pub fn replace_control_chars(s: &str) -> String {
s.chars().filter(|ch: &char| !ch.is_control()).collect()
}
pub fn parse_json_str_with_fallback<T: DeserializeOwned>(raw: &str) -> Result<(T, String)> {
match serde_json::from_str::<T>(raw) {
Ok(v) => Ok((v, raw.to_string())),
Err(_) => {
let cleaned = replace_control_chars(raw);
let v = serde_json::from_str::<T>(&cleaned)?;
Ok((v, cleaned))
}
}
}
pub fn get_web_header_json_value<T: DeserializeOwned>(
response_str: &str,
) -> Result<HeaderQueryContent<T>> {
let cleaned = replace_control_chars(response_str);
match serde_json::from_str::<T>(&cleaned) {
Ok(header_json) => Ok(HeaderQueryContent::Value(header_json)),
Err(_) => {
let bmstable_url = try_extract_bmstable_from_html(response_str)
.context("When extracting bmstable url")?;
Ok(HeaderQueryContent::Url(bmstable_url))
}
}
}
pub fn header_query_with_fallback<T: DeserializeOwned>(
raw: &str,
) -> Result<(HeaderQueryContent<T>, String)> {
match get_web_header_json_value::<T>(raw) {
Ok(v) => Ok((v, raw.to_string())),
Err(_) => {
let cleaned = replace_control_chars(raw);
let v = get_web_header_json_value::<T>(&cleaned)?;
Ok((v, cleaned))
}
}
}
pub fn try_extract_bmstable_from_html(html_content: &str) -> Result<String> {
let document = Html::parse_document(html_content);
let meta_selector = Selector::parse("meta").map_err(|_| anyhow!("meta tag not found"))?;
let link_selector = Selector::parse("link").ok();
let a_selector = Selector::parse("a").ok();
let script_selector = Selector::parse("script").ok();
let find_attr = |selector: &Selector,
attr: &str,
keep: &mut dyn FnMut(&ElementRef<'_>, &str) -> bool|
-> Option<String> {
for element in document.select(selector) {
if let Some(value) = element.value().attr(attr)
&& keep(&element, value)
{
return Some(value.to_string());
}
}
None
};
let candidate = meta_bmstable(&document, &meta_selector)
.or_else(|| {
let mut keep = |element: &ElementRef<'_>, href: &str| {
element
.value()
.attr("rel")
.is_some_and(|v| v.eq_ignore_ascii_case("bmstable"))
&& !href.is_empty()
};
link_selector
.as_ref()
.and_then(|sel| find_attr(sel, "href", &mut keep))
})
.or_else(|| {
let mut keep = |_: &ElementRef<'_>, href: &str| contains_header_json(href);
a_selector
.as_ref()
.and_then(|sel| find_attr(sel, "href", &mut keep))
})
.or_else(|| {
let mut keep = |_: &ElementRef<'_>, href: &str| contains_header_json(href);
link_selector
.as_ref()
.and_then(|sel| find_attr(sel, "href", &mut keep))
})
.or_else(|| {
let mut keep = |_: &ElementRef<'_>, src: &str| contains_header_json(src);
script_selector
.as_ref()
.and_then(|sel| find_attr(sel, "src", &mut keep))
})
.or_else(|| {
let mut keep = |_: &ElementRef<'_>, content: &str| contains_header_json(content);
find_attr(&meta_selector, "content", &mut keep)
})
.or_else(|| {
find_header_json_in_text(html_content)
.map(|(start, end)| html_content[start..end].to_string())
});
candidate.map_or_else(
|| Err(anyhow!("bmstable field or header JSON hint not found")),
Ok,
)
}
fn find_header_json_in_text(s: &str) -> Option<(usize, usize)> {
let lower = s.to_ascii_lowercase();
let mut pos = 0;
while let Some(idx) = lower[pos..].find("header") {
let global_idx = pos + idx;
if let Some(json_rel) = lower[global_idx..].find(".json") {
let end = global_idx + json_rel + ".json".len();
let start = lower[..global_idx]
.rfind(|c: char| c == '"' || c == '\'' || c.is_whitespace())
.map(|i| i + 1)
.unwrap_or(global_idx);
if end > start {
return Some((start, end));
}
}
pos = global_idx + 6; }
None
}
fn contains_header_json(s: &str) -> bool {
let ls = s.to_ascii_lowercase();
ls.contains("header") && ls.ends_with(".json")
}
fn meta_bmstable(document: &Html, meta_selector: &Selector) -> Option<String> {
for element in document.select(meta_selector) {
let is_bmstable = element
.value()
.attr("name")
.is_some_and(|v| v.eq_ignore_ascii_case("bmstable"))
|| element
.value()
.attr("property")
.is_some_and(|v| v.eq_ignore_ascii_case("bmstable"));
if is_bmstable
&& let Some(content_attr) = element.value().attr("content")
&& !content_attr.is_empty()
{
return Some(content_attr.to_string());
}
}
None
}