bms_table/fetch/
reqwest.rs

1//! 基于 `reqwest` 的网络获取模块
2//!
3//! 提供一站式从网页或头部 JSON 源拉取并解析 BMS 难度表的能力:
4//! - 获取网页并从 HTML 提取 bmstable 头部地址(如有);
5//! - 下载并解析头部 JSON;
6//! - 根据头部中的 `data_url` 下载谱面数据并解析;
7//! - 返回包含表头与谱面集合的 `BmsTable`。
8//!
9//! # 示例
10//!
11//! ```rust,no_run
12//! # #[tokio::main]
13//! # async fn main() -> anyhow::Result<()> {
14//! use bms_table::fetch::reqwest::{fetch_table, make_lenient_client};
15//! let client = make_lenient_client()?;
16//! let table = fetch_table(&client, "https://stellabms.xyz/sl/table.html").await?;
17//! assert!(!table.data.charts.is_empty());
18//! # Ok(())
19//! # }
20//! ```
21#![cfg(feature = "reqwest")]
22use std::collections::BTreeMap;
23
24use anyhow::{Result, anyhow};
25use reqwest::header::{HeaderMap, HeaderName, HeaderValue};
26use serde_json::Value;
27use std::time::Duration;
28use url::Url;
29
30use crate::{BmsTable, BmsTableInfo, BmsTableRaw, fetch::replace_control_chars};
31
32/// 从网页或头部 JSON 源拉取并解析完整的 BMS 难度表。
33///
34/// # 参数
35///
36/// - `web_url`:网页地址或直接指向头部 JSON 的地址。
37///
38/// # 返回
39///
40/// 解析后的 [`crate::BmsTable`],包含表头与谱面数据。
41///
42/// # 错误
43///
44/// - 网络请求失败(连接失败、超时等)
45/// - 响应内容无法解析为 HTML/JSON 或结构不符合预期
46/// - 头部 JSON 未包含 `data_url` 字段或其类型不正确
47pub async fn fetch_table_full(
48    client: &reqwest::Client,
49    web_url: &str,
50) -> Result<(BmsTable, BmsTableRaw)> {
51    let web_url = Url::parse(web_url)?;
52    let web_response = client
53        .get(web_url.clone())
54        .send()
55        .await
56        .map_err(|e| anyhow!("When fetching web: {e}"))?
57        .text()
58        .await
59        .map_err(|e| anyhow!("When parsing web response: {e}"))?;
60    let (header_url, header_json, header_raw) =
61        match crate::fetch::get_web_header_json_value(&web_response)? {
62            crate::fetch::HeaderQueryContent::Url(header_url_string) => {
63                let header_url = web_url.join(&header_url_string)?;
64                let header_response = client
65                    .get(header_url.clone())
66                    .send()
67                    .await
68                    .map_err(|e| anyhow!("When fetching header: {e}"))?;
69                let header_response_string = header_response
70                    .text()
71                    .await
72                    .map_err(|e| anyhow!("When parsing header response: {e}"))?;
73                let crate::fetch::HeaderQueryContent::Json(header_json) =
74                    crate::fetch::get_web_header_json_value(&header_response_string)?
75                else {
76                    return Err(anyhow!(
77                        "Cycled header found. web_url: {web_url}, header_url: {header_url_string}"
78                    ));
79                };
80                (header_url, header_json, header_response_string)
81            }
82            crate::fetch::HeaderQueryContent::Json(value) => {
83                let header_raw = serde_json::to_string(&value)?;
84                (web_url, value, header_raw)
85            }
86        };
87    let data_url_str = header_json
88        .get("data_url")
89        .ok_or_else(|| anyhow!("\"data_url\" not found in header json!"))?
90        .as_str()
91        .ok_or_else(|| anyhow!("\"data_url\" is not a string!"))?;
92    let data_url = header_url.join(data_url_str)?;
93    let data_response = client
94        .get(data_url.clone())
95        .send()
96        .await
97        .map_err(|e| anyhow!("When fetching web: {e}"))?
98        .text()
99        .await
100        .map_err(|e| anyhow!("When parsing web response: {e}"))?;
101    // 在解析前移除非法控制字符,但保持原始 data_raw 不变
102    let data_cleaned = replace_control_chars(&data_response);
103    let data_json: Value = serde_json::from_str(&data_cleaned)?;
104    // 直接使用库内反序列化生成 BmsTable
105    let header: crate::BmsTableHeader = serde_json::from_value(header_json)
106        .map_err(|e| anyhow!("When parsing header json: {e}"))?;
107    let data: crate::BmsTableData =
108        serde_json::from_value(data_json).map_err(|e| anyhow!("When parsing data json: {e}"))?;
109    Ok((
110        BmsTable { header, data },
111        BmsTableRaw {
112            header_json_url: header_url,
113            header_raw,
114            data_json_url: data_url,
115            data_raw: data_response,
116        },
117    ))
118}
119
120/// 从网页或头部 JSON 源拉取并解析完整的 BMS 难度表。
121///
122/// 参考 [`fetch_table_full`]。
123pub async fn fetch_table(client: &reqwest::Client, web_url: &str) -> Result<BmsTable> {
124    let (table, _raw) = fetch_table_full(client, web_url).await?;
125    Ok(table)
126}
127
128/// 获取 BMS 难度表列表。
129///
130/// 从提供的 `web_url` 下载 JSON 数组并解析为 [`crate::BmsTableInfo`] 列表。
131/// 仅要求每个元素包含 `name`、`symbol` 与 `url`(字符串),其他字段将被收集到 `extra` 中。
132pub async fn fetch_table_list(
133    client: &reqwest::Client,
134    web_url: &str,
135) -> Result<Vec<BmsTableInfo>> {
136    let (out, _raw) = fetch_table_list_full(client, web_url).await?;
137    Ok(out)
138}
139
140/// 获取 BMS 难度表列表及其原始 JSON 字符串。
141///
142/// 返回解析后的列表项数组与响应的原始 JSON 文本,便于记录或调试。
143pub async fn fetch_table_list_full(
144    client: &reqwest::Client,
145    web_url: &str,
146) -> Result<(Vec<BmsTableInfo>, String)> {
147    let web_url = Url::parse(web_url)?;
148    let response_text = client
149        .get(web_url)
150        .send()
151        .await
152        .map_err(|e| anyhow!("When fetching table list: {e}"))?
153        .text()
154        .await
155        .map_err(|e| anyhow!("When parsing table list response: {e}"))?;
156
157    // 在解析前移除非法控制字符,但保持原始响应文本不变
158    let cleaned = replace_control_chars(&response_text);
159    let value: Value = serde_json::from_str(&cleaned)?;
160    let arr = value
161        .as_array()
162        .ok_or_else(|| anyhow!("Table list root is not an array"))?;
163
164    let mut out = Vec::with_capacity(arr.len());
165    for (idx, item) in arr.iter().enumerate() {
166        let obj = item
167            .as_object()
168            .ok_or_else(|| anyhow!("Table list item #{idx} is not an object"))?;
169
170        let name = obj
171            .get("name")
172            .and_then(|v| v.as_str())
173            .ok_or_else(|| anyhow!("Missing required field 'name' at index {idx}"))?;
174        let symbol = obj
175            .get("symbol")
176            .and_then(|v| v.as_str())
177            .ok_or_else(|| anyhow!("Missing required field 'symbol' at index {idx}"))?;
178        let url_str = obj
179            .get("url")
180            .and_then(|v| v.as_str())
181            .ok_or_else(|| anyhow!("Missing required field 'url' at index {idx}"))?;
182        let url = Url::parse(url_str)?;
183
184        #[cfg(feature = "serde")]
185        let extra = {
186            let mut m: BTreeMap<String, Value> = BTreeMap::new();
187            for (k, v) in obj.iter() {
188                if k != "name" && k != "symbol" && k != "url" {
189                    m.insert(k.clone(), v.clone());
190                }
191            }
192            m
193        };
194
195        let entry = BmsTableInfo {
196            name: name.to_string(),
197            symbol: symbol.to_string(),
198            url,
199            #[cfg(feature = "serde")]
200            extra,
201        };
202        out.push(entry);
203    }
204
205    Ok((out, response_text))
206}
207
208/// 创建一个规则宽松、兼容性更强的 HTTP 客户端。
209///
210/// - 设置浏览器 UA;
211/// - 配置超时与重定向;
212/// - 接受无效证书(用于少数不规范站点);
213/// - 接受无效主机名(用于少数不规范站点);
214///
215/// 注意:生产环境应审慎使用 `danger_accept_invalid_certs`。
216pub fn make_lenient_client() -> Result<reqwest::Client> {
217    // 默认请求头更贴近真实浏览器行为
218    let mut headers = HeaderMap::new();
219    headers.insert(
220        HeaderName::from_static("accept"),
221        HeaderValue::from_static(
222            "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
223        ),
224    );
225    headers.insert(
226        HeaderName::from_static("accept-language"),
227        HeaderValue::from_static("zh-CN,zh;q=0.9,en;q=0.8"),
228    );
229    headers.insert(
230        HeaderName::from_static("upgrade-insecure-requests"),
231        HeaderValue::from_static("1"),
232    );
233    headers.insert(
234        HeaderName::from_static("connection"),
235        HeaderValue::from_static("keep-alive"),
236    );
237
238    let client = reqwest::Client::builder()
239        .default_headers(headers)
240        .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119 Safari/537.36 bms-table-rs")
241        .timeout(Duration::from_secs(60))
242        .redirect(reqwest::redirect::Policy::limited(100))
243        // 使重定向时自动附带 Referer,更接近浏览器行为
244        .referer(true)
245        // 启用 Cookie 存储,更贴近真实用户会话
246        .cookie_store(true)
247        // 为兼容少数不规范站点保留宽松的 TLS 设置
248        .danger_accept_invalid_certs(true)
249        .danger_accept_invalid_hostnames(true)
250        .build()
251        .map_err(|e| anyhow!("When building client: {e}"))?;
252    Ok(client)
253}