bms_table/fetch/
reqwest.rs

1//! 基于 `reqwest` 的网络获取模块
2//!
3//! 提供一站式从网页或头部 JSON 源拉取并解析 BMS 难度表的能力:
4//! - 获取网页并从 HTML 提取 bmstable 头部地址(如有);
5//! - 下载并解析头部 JSON;
6//! - 根据头部中的 `data_url` 下载谱面数据并解析;
7//! - 返回包含表头与谱面集合的 `BmsTable`。
8//!
9//! # 示例
10//!
11//! ```rust,no_run
12//! # #[tokio::main]
13//! # async fn main() -> anyhow::Result<()> {
14//! use bms_table::fetch::reqwest::{fetch_table, make_lenient_client};
15//! let client = make_lenient_client()?;
16//! let table = fetch_table(&client, "https://stellabms.xyz/sl/table.html").await?;
17//! assert!(!table.data.charts.is_empty());
18//! # Ok(())
19//! # }
20//! ```
21#![cfg(feature = "reqwest")]
22
23use anyhow::{Result, anyhow};
24use serde_json::Value;
25use std::collections::BTreeMap;
26use std::time::Duration;
27use url::Url;
28
29use crate::{BmsTable, BmsTableInfo, BmsTableRaw, fetch::replace_control_chars};
30
31/// 从网页或头部 JSON 源拉取并解析完整的 BMS 难度表。
32///
33/// # 参数
34///
35/// - `web_url`:网页地址或直接指向头部 JSON 的地址。
36///
37/// # 返回
38///
39/// 解析后的 [`crate::BmsTable`],包含表头与谱面数据。
40///
41/// # 错误
42///
43/// - 网络请求失败(连接失败、超时等)
44/// - 响应内容无法解析为 HTML/JSON 或结构不符合预期
45/// - 头部 JSON 未包含 `data_url` 字段或其类型不正确
46pub async fn fetch_table_full(
47    client: &reqwest::Client,
48    web_url: &str,
49) -> Result<(BmsTable, BmsTableRaw)> {
50    let web_url = Url::parse(web_url)?;
51    let web_response = client
52        .get(web_url.clone())
53        .send()
54        .await
55        .map_err(|e| anyhow!("When fetching web: {e}"))?
56        .text()
57        .await
58        .map_err(|e| anyhow!("When parsing web response: {e}"))?;
59    let (header_url, header_json, header_raw) =
60        match crate::fetch::get_web_header_json_value(&web_response)? {
61            crate::fetch::HeaderQueryContent::Url(header_url_string) => {
62                let header_url = web_url.join(&header_url_string)?;
63                let header_response = client
64                    .get(header_url.clone())
65                    .send()
66                    .await
67                    .map_err(|e| anyhow!("When fetching header: {e}"))?;
68                let header_response_string = header_response
69                    .text()
70                    .await
71                    .map_err(|e| anyhow!("When parsing header response: {e}"))?;
72                let crate::fetch::HeaderQueryContent::Json(header_json) =
73                    crate::fetch::get_web_header_json_value(&header_response_string)?
74                else {
75                    return Err(anyhow!(
76                        "Cycled header found. web_url: {web_url}, header_url: {header_url_string}"
77                    ));
78                };
79                (header_url, header_json, header_response_string)
80            }
81            crate::fetch::HeaderQueryContent::Json(value) => {
82                let header_raw = serde_json::to_string(&value)?;
83                (web_url, value, header_raw)
84            }
85        };
86    let data_url_str = header_json
87        .get("data_url")
88        .ok_or_else(|| anyhow!("\"data_url\" not found in header json!"))?
89        .as_str()
90        .ok_or_else(|| anyhow!("\"data_url\" is not a string!"))?;
91    let data_url = header_url.join(data_url_str)?;
92    let data_response = client
93        .get(data_url)
94        .send()
95        .await
96        .map_err(|e| anyhow!("When fetching web: {e}"))?
97        .text()
98        .await
99        .map_err(|e| anyhow!("When parsing web response: {e}"))?;
100    // 在解析前移除非法控制字符,但保持原始 data_raw 不变
101    let data_cleaned = replace_control_chars(&data_response);
102    let data_json: Value = serde_json::from_str(&data_cleaned)?;
103    // 直接使用库内反序列化生成 BmsTable
104    let header: crate::BmsTableHeader = serde_json::from_value(header_json)
105        .map_err(|e| anyhow!("When parsing header json: {e}"))?;
106    let data: crate::BmsTableData =
107        serde_json::from_value(data_json).map_err(|e| anyhow!("When parsing data json: {e}"))?;
108    Ok((
109        BmsTable { header, data },
110        BmsTableRaw {
111            header_raw,
112            data_raw: data_response,
113        },
114    ))
115}
116
117/// 从网页或头部 JSON 源拉取并解析完整的 BMS 难度表。
118///
119/// 参考 [`fetch_table_full`]。
120pub async fn fetch_table(client: &reqwest::Client, web_url: &str) -> Result<BmsTable> {
121    let (table, _raw) = fetch_table_full(client, web_url).await?;
122    Ok(table)
123}
124
125/// 获取 BMS 难度表列表。
126///
127/// 从提供的 `web_url` 下载 JSON 数组并解析为 [`crate::BmsTableInfo`] 列表。
128/// 仅要求每个元素包含 `name`、`symbol` 与 `url`(字符串),其他字段将被收集到 `extra` 中。
129pub async fn fetch_table_list(
130    client: &reqwest::Client,
131    web_url: &str,
132) -> Result<Vec<BmsTableInfo>> {
133    let (out, _raw) = fetch_table_list_full(client, web_url).await?;
134    Ok(out)
135}
136
137/// 获取 BMS 难度表列表及其原始 JSON 字符串。
138///
139/// 返回解析后的列表项数组与响应的原始 JSON 文本,便于记录或调试。
140pub async fn fetch_table_list_full(
141    client: &reqwest::Client,
142    web_url: &str,
143) -> Result<(Vec<BmsTableInfo>, String)> {
144    let web_url = Url::parse(web_url)?;
145    let response_text = client
146        .get(web_url)
147        .send()
148        .await
149        .map_err(|e| anyhow!("When fetching table list: {e}"))?
150        .text()
151        .await
152        .map_err(|e| anyhow!("When parsing table list response: {e}"))?;
153
154    // 在解析前移除非法控制字符,但保持原始响应文本不变
155    let cleaned = replace_control_chars(&response_text);
156    let value: Value = serde_json::from_str(&cleaned)?;
157    let arr = value
158        .as_array()
159        .ok_or_else(|| anyhow!("Table list root is not an array"))?;
160
161    let mut out = Vec::with_capacity(arr.len());
162    for (idx, item) in arr.iter().enumerate() {
163        let obj = item
164            .as_object()
165            .ok_or_else(|| anyhow!("Table list item #{idx} is not an object"))?;
166
167        let name = obj
168            .get("name")
169            .and_then(|v| v.as_str())
170            .ok_or_else(|| anyhow!("Missing required field 'name' at index {idx}"))?;
171        let symbol = obj
172            .get("symbol")
173            .and_then(|v| v.as_str())
174            .ok_or_else(|| anyhow!("Missing required field 'symbol' at index {idx}"))?;
175        let url_str = obj
176            .get("url")
177            .and_then(|v| v.as_str())
178            .ok_or_else(|| anyhow!("Missing required field 'url' at index {idx}"))?;
179        let url = Url::parse(url_str)?;
180
181        #[cfg(feature = "serde")]
182        let extra = {
183            let mut m: BTreeMap<String, Value> = BTreeMap::new();
184            for (k, v) in obj.iter() {
185                if k != "name" && k != "symbol" && k != "url" {
186                    m.insert(k.clone(), v.clone());
187                }
188            }
189            m
190        };
191
192        let entry = BmsTableInfo {
193            name: name.to_string(),
194            symbol: symbol.to_string(),
195            url,
196            #[cfg(feature = "serde")]
197            extra,
198        };
199        out.push(entry);
200    }
201
202    Ok((out, response_text))
203}
204
205/// 创建一个规则宽松、兼容性更强的 HTTP 客户端。
206///
207/// - 设置浏览器 UA;
208/// - 配置超时与重定向;
209/// - 接受无效证书(用于少数不规范站点);
210/// - 接受无效主机名(用于少数不规范站点);
211///
212/// 注意:生产环境应审慎使用 `danger_accept_invalid_certs`。
213pub fn make_lenient_client() -> Result<reqwest::Client> {
214    let client = reqwest::Client::builder()
215        .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119 Safari/537.36 bms-table-rs")
216        .timeout(Duration::from_secs(60))
217        .redirect(reqwest::redirect::Policy::limited(100))
218        .danger_accept_invalid_certs(true)
219        .danger_accept_invalid_hostnames(true)
220        .build()
221        .map_err(|e| anyhow!("When building client: {e}"))?;
222    Ok(client)
223}