bms_table/fetch/
reqwest.rs

1//! Network fetching module based on `reqwest`
2//!
3//! Provides an all-in-one ability to fetch and parse BMS difficulty tables from a web page or a header JSON source:
4//! - Fetch the page and extract the bmstable header URL from HTML (if present);
5//! - Download and parse the header JSON;
6//! - Download and parse chart data according to `data_url` in the header;
7//! - Return a `BmsTable` containing the header and the chart set.
8//!
9//! # Example
10//!
11//! ```rust,no_run
12//! # #[tokio::main]
13//! # async fn main() -> anyhow::Result<()> {
14//! use bms_table::fetch::reqwest::{fetch_table, make_lenient_client};
15//! let client = make_lenient_client()?;
16//! let table = fetch_table(&client, "https://stellabms.xyz/sl/table.html").await?;
17//! assert!(!table.data.charts.is_empty());
18//! # Ok(())
19//! # }
20//! ```
21#![cfg(feature = "reqwest")]
22use std::collections::BTreeMap;
23
24use anyhow::{Result, anyhow};
25use reqwest::header::{HeaderMap, HeaderName, HeaderValue};
26use serde_json::Value;
27use std::time::Duration;
28use url::Url;
29
30use crate::{BmsTable, BmsTableInfo, BmsTableRaw, fetch::replace_control_chars};
31
32/// Fetch and parse a complete BMS difficulty table from a web page or a header JSON source.
33///
34/// # Parameters
35///
36/// - `web_url`: page URL or an URL pointing directly to the header JSON.
37///
38/// # Returns
39///
40/// Parsed [`crate::BmsTable`], containing header and chart data.
41///
42/// # Errors
43///
44/// - Network request failures (connection failure, timeout, etc.)
45/// - Response content cannot be parsed as HTML/JSON or structure is unexpected
46/// - Header JSON does not contain `data_url` or has the wrong type
47pub async fn fetch_table_full(
48    client: &reqwest::Client,
49    web_url: &str,
50) -> Result<(BmsTable, BmsTableRaw)> {
51    let web_url = Url::parse(web_url)?;
52    let web_response = client
53        .get(web_url.clone())
54        .send()
55        .await
56        .map_err(|e| anyhow!("When fetching web: {e}"))?
57        .text()
58        .await
59        .map_err(|e| anyhow!("When parsing web response: {e}"))?;
60    let (header_url, header_json, header_raw) =
61        match crate::fetch::get_web_header_json_value(&web_response)? {
62            crate::fetch::HeaderQueryContent::Url(header_url_string) => {
63                let header_url = web_url.join(&header_url_string)?;
64                let header_response = client
65                    .get(header_url.clone())
66                    .send()
67                    .await
68                    .map_err(|e| anyhow!("When fetching header: {e}"))?;
69                let header_response_string = header_response
70                    .text()
71                    .await
72                    .map_err(|e| anyhow!("When parsing header response: {e}"))?;
73                let crate::fetch::HeaderQueryContent::Json(header_json) =
74                    crate::fetch::get_web_header_json_value(&header_response_string)?
75                else {
76                    return Err(anyhow!(
77                        "Cycled header found. web_url: {web_url}, header_url: {header_url_string}"
78                    ));
79                };
80                (header_url, header_json, header_response_string)
81            }
82            crate::fetch::HeaderQueryContent::Json(value) => {
83                let header_raw = serde_json::to_string(&value)?;
84                (web_url, value, header_raw)
85            }
86        };
87    let data_url_str = header_json
88        .get("data_url")
89        .ok_or_else(|| anyhow!("\"data_url\" not found in header json!"))?
90        .as_str()
91        .ok_or_else(|| anyhow!("\"data_url\" is not a string!"))?;
92    let data_url = header_url.join(data_url_str)?;
93    let data_response = client
94        .get(data_url.clone())
95        .send()
96        .await
97        .map_err(|e| anyhow!("When fetching web: {e}"))?
98        .text()
99        .await
100        .map_err(|e| anyhow!("When parsing web response: {e}"))?;
101    // Remove illegal control characters before parsing while keeping the original data_raw unchanged
102    let data_cleaned = replace_control_chars(&data_response);
103    let data_json: Value = serde_json::from_str(&data_cleaned)?;
104    // Build BmsTable via the crate's deserialization
105    let header: crate::BmsTableHeader = serde_json::from_value(header_json)
106        .map_err(|e| anyhow!("When parsing header json: {e}"))?;
107    let data: crate::BmsTableData =
108        serde_json::from_value(data_json).map_err(|e| anyhow!("When parsing data json: {e}"))?;
109    Ok((
110        BmsTable { header, data },
111        BmsTableRaw {
112            header_json_url: header_url,
113            header_raw,
114            data_json_url: data_url,
115            data_raw: data_response,
116        },
117    ))
118}
119
120/// Fetch and parse a complete BMS difficulty table.
121///
122/// See [`fetch_table_full`].
123pub async fn fetch_table(client: &reqwest::Client, web_url: &str) -> Result<BmsTable> {
124    let (table, _raw) = fetch_table_full(client, web_url).await?;
125    Ok(table)
126}
127
128/// Fetch a list of BMS difficulty tables.
129///
130/// Downloads a JSON array from the provided `web_url` and parses it into a list of [`crate::BmsTableInfo`].
131/// Each item only requires `name`, `symbol`, and `url` (string); all other fields are collected into `extra`.
132pub async fn fetch_table_list(
133    client: &reqwest::Client,
134    web_url: &str,
135) -> Result<Vec<BmsTableInfo>> {
136    let (out, _raw) = fetch_table_list_full(client, web_url).await?;
137    Ok(out)
138}
139
140/// Fetch a list of BMS difficulty tables along with the raw JSON string.
141///
142/// Returns the parsed array of list entries and the raw JSON response text for recording or debugging.
143pub async fn fetch_table_list_full(
144    client: &reqwest::Client,
145    web_url: &str,
146) -> Result<(Vec<BmsTableInfo>, String)> {
147    let web_url = Url::parse(web_url)?;
148    let response_text = client
149        .get(web_url)
150        .send()
151        .await
152        .map_err(|e| anyhow!("When fetching table list: {e}"))?
153        .text()
154        .await
155        .map_err(|e| anyhow!("When parsing table list response: {e}"))?;
156
157    // Remove illegal control characters before parsing while keeping the original response text unchanged
158    let cleaned = replace_control_chars(&response_text);
159    let value: Value = serde_json::from_str(&cleaned)?;
160    let arr = value
161        .as_array()
162        .ok_or_else(|| anyhow!("Table list root is not an array"))?;
163
164    let mut out = Vec::with_capacity(arr.len());
165    for (idx, item) in arr.iter().enumerate() {
166        let obj = item
167            .as_object()
168            .ok_or_else(|| anyhow!("Table list item #{idx} is not an object"))?;
169
170        let name = obj
171            .get("name")
172            .and_then(|v| v.as_str())
173            .ok_or_else(|| anyhow!("Missing required field 'name' at index {idx}"))?;
174        let symbol = obj
175            .get("symbol")
176            .and_then(|v| v.as_str())
177            .ok_or_else(|| anyhow!("Missing required field 'symbol' at index {idx}"))?;
178        let url_str = obj
179            .get("url")
180            .and_then(|v| v.as_str())
181            .ok_or_else(|| anyhow!("Missing required field 'url' at index {idx}"))?;
182        let url = Url::parse(url_str)?;
183
184        #[cfg(feature = "serde")]
185        let extra = {
186            let mut m: BTreeMap<String, Value> = BTreeMap::new();
187            for (k, v) in obj.iter() {
188                if k != "name" && k != "symbol" && k != "url" {
189                    m.insert(k.clone(), v.clone());
190                }
191            }
192            m
193        };
194
195        let entry = BmsTableInfo {
196            name: name.to_string(),
197            symbol: symbol.to_string(),
198            url,
199            #[cfg(feature = "serde")]
200            extra,
201        };
202        out.push(entry);
203    }
204
205    Ok((out, response_text))
206}
207
208/// Create a more lenient and compatible HTTP client.
209///
210/// - Set a browser-like UA;
211/// - Configure timeouts and redirects;
212/// - Accept invalid certificates (for a few non-compliant sites);
213/// - Accept invalid hostnames (for a few non-compliant sites);
214///
215/// Note: use `danger_accept_invalid_certs` with caution in production.
216pub fn make_lenient_client() -> Result<reqwest::Client> {
217    // Default headers emulate real browser behavior more closely
218    let mut headers = HeaderMap::new();
219    headers.insert(
220        HeaderName::from_static("accept"),
221        HeaderValue::from_static(
222            "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
223        ),
224    );
225    headers.insert(
226        HeaderName::from_static("accept-language"),
227        HeaderValue::from_static("zh-CN,zh;q=0.9,en;q=0.8"),
228    );
229    headers.insert(
230        HeaderName::from_static("upgrade-insecure-requests"),
231        HeaderValue::from_static("1"),
232    );
233    headers.insert(
234        HeaderName::from_static("connection"),
235        HeaderValue::from_static("keep-alive"),
236    );
237
238    let client = reqwest::Client::builder()
239        .default_headers(headers)
240        .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119 Safari/537.36 bms-table-rs")
241        .timeout(Duration::from_secs(60))
242        .redirect(reqwest::redirect::Policy::limited(100))
243        // Automatically include Referer on redirects, closer to browser behavior
244        .referer(true)
245        // Enable cookie store, closer to real user sessions
246        .cookie_store(true)
247        // Keep lenient TLS settings for compatibility with some non-compliant sites
248        .danger_accept_invalid_certs(true)
249        .danger_accept_invalid_hostnames(true)
250        .build()
251        .map_err(|e| anyhow!("When building client: {e}"))?;
252    Ok(client)
253}